set.seed(12345)
# Load required libraries
library(reshape2)
library(dada2)
library(vegan)
library(ggplot2)
library(GUniFrac)
library(ggpubr)
library(readxl)
library(RColorBrewer)
library(seqRFLP)
library(gtools)
library(dplyr)
# Custom functions used in this analysis
# For loading R objects from other environments
loadER <- function (rdata, object) {
local({
load(rdata)
eval(parse(text = object))
})
}
# Function mellt.dist to melt distance objects
melt.dist <- function(x,only_identity = FALSE,omit_identity=TRUE) {
library(reshape2)
if(!is.matrix(x)) {
x <- as.matrix(x)
}
y <- melt(x)
if(omit_identity) {
l <- list();
nr <- nrow(x);
nin <- c();
for(i in 0:(nr - 1)){
rs <- (i * nr)+1;
if(only_identity==FALSE) {
l[[i+1]] <- (rs+i):(rs+nr-1)
} else {
l[[i+1]] <- (rs+i)
}
}
l <- -c(unlist(l))
y <- y[l,]
}
return(y)
}
# Function to melt distance, add same/diff family info, and tabulate into dataframes
AD.melt.dist.sf <- function(asv_table,meta){
# Calculate Distance matrix
dist <- vegdist(asv_table, method = "bray")
dist1 <- vegdist(asv_table, method = "jaccard")
# Melt dist
dist_melt <- melt.dist(dist) # BC
dist1_melt <- melt.dist(dist1) # JC
# Add relevant meta fields
dist_melt$fam1 <- meta$family_id[match(dist_melt$Var1, meta$sample)] # BC
dist_melt$fam2 <- meta$family_id[match(dist_melt$Var2, meta$sample)] # BC
dist1_melt$fam1 <- meta$family_id[match(dist1_melt$Var1, meta$sample)] # JC
dist1_melt$fam2 <- meta$family_id[match(dist1_melt$Var2, meta$sample)] # JC
# Adding same/diff markers
dist_melt$family <- "Same Family" # BC
dist_melt[dist_melt$fam1!=dist_melt$fam2,]$family <- "Diff Family" # BC
dist1_melt$family <- "Same Family" # JC
dist1_melt[dist1_melt$fam1!=dist1_melt$fam2,]$family <- "Diff Family" # JC
# Subset for samefam
dist_melt_sf <- dist_melt[dist_melt$family=="Same Family",] # BC
dist_melt_sf$status <- meta$status[match(dist_melt_sf$Var2, meta$sample)] # BC
dist1_melt_sf <- dist1_melt[dist1_melt$family=="Same Family",] # JC
dist1_melt_sf$status <- meta$status[match(dist1_melt_sf$Var2, meta$sample)] # JC
# Subset for different family
dist_melt_df <- dist_melt[dist_melt$family=="Diff Family",] # BC
dist_melt_df$status <- "Unrelated"
dist1_melt_df <- dist1_melt[dist1_melt$family=="Diff Family",] # JC
dist1_melt_df$status <- "Unrelated"
# Output
list(
dm = dist_melt,
dmsf = dist_melt_sf,
dmdf = dist_melt_df,
jm = dist1_melt,
jmsf = dist1_melt_sf,
jmdf = dist1_melt_df
)
}
# Function to calculate P value using the permutation method
# (alternative to Wilcox test when one group is unrelated subjects)
perm_p_value <- function(otu_table, meth){
set.seed(12345)
t = 1000 # permutation times
sub_n = nrow(otu_table)/2 #'sub_n' is the sample size of off-diagnonal subsample,
# let it be the same as diagonal entries (=48)
# input table,
# note: the data of paired mom and child, should be adjacent in the table,
# mom or child always comes first, e.g M1,C1,M2,C2...
mc_data <- otu_table
#check pairing
if(nrow(mc_data)%%2==1){stop('Error: number of subject is odd.')}
n_pair <- nrow(mc_data)/2 # number of pairs
#compute full B-C dissimilarity matrix
dist_full_matrix <- vegdist(mc_data, method = meth) %>% as.matrix()
# dist btw mom and child
D <- dist_full_matrix[(1:n_pair)*2,((1:n_pair)*2-1)]
# observed test statistic
median_diff_obs <- median(D[row(D)>col(D)]) - median(diag(D))
# permuation
stat_perm <- rep(0,t)
for(i in 1:t){
D_perm <- D[,sample(n_pair)] # permute childrens
# compute statistics from permutation (only use sub_n off-diag entries for lower power)
stat_perm[i] <- median(sample(D_perm[row(D_perm)>col(D_perm)],sub_n)) - median(diag(D_perm))
}
#compute p.value
stat_perm_mean <- mean(stat_perm)
# p.value is proportion of observed test stat. more extreme than
# the permutated test statistics, which form the null distribution
p.value_diff <- mean((stat_perm-stat_perm_mean)^2>(median_diff_obs-stat_perm_mean)^2)
# Convert to star
perm_pval_star <- stars.pval(p.value_diff)[1]
# Set label as "ns" when not significant
if (perm_pval_star == "." | perm_pval_star == " "){
perm_pval_star = "ns"
}
# Output
list(
pval = p.value_diff,
pval_star = perm_pval_star
)
}
# Converting Wilcox test p values to star symbols
wilcox_pval_star <- function(value, grp){
# Load required library
library(gtools) # for stars pval function
# Set seed
set.seed(12345)
# Wilcox test
w_pval <- wilcox.test(value ~ grp, exact=F)$p.value
# Convert to star
w_pval_star <- stars.pval(w_pval)[1]
# Set label as "ns" when not significant
if ( w_pval_star == "." | w_pval_star == " "){
w_pval_star = "ns"
}
# Output
list(
pval = w_pval,
pval_star = w_pval_star
)
}
# Function for calculating distance for individual species
species_dist_calc_sal <- function(otu_name){
# For mother-child comparisons
# Subset for genus
species_counts_mch <- ISR_sal_matched_mch[,colnames(ISR_sal_matched_mch) %in% row.names(ISR_blast_matches_sal[ISR_blast_matches_sal$otu1==otu_name,])]
# For presence/absence
# Add "other" column
species_counts_mch$other <- rowSums(ISR_sal_matched_mch) - rowSums(species_counts_mch)
# Rarefy
set.seed(12345); species_counts_mch_rar <- as.data.frame(rrarefy(species_counts_mch, min(rowSums(species_counts_mch))))
# Remove other column
species_counts_mch_rar$other <- NULL
species_counts_mch$other <- NULL
# Presence/Absence
species_prabs <- data.frame((species_counts_mch_rar > 0)*1)
# Calculate dist for sf plots
species_prab_sfd <- AD.melt.dist.sf(species_prabs, ISR_sal_meta_mch)$dmsf
species_prab_sfd$otu <- otu_name
return(species_prab_sfd)
}
(See Adoption Data Filtering Script for details)
ISR_seqtab_atab_comb_filtp_clean <- read.table(file="Input_files/ISR_seqtab_atab_comb_filtp_clean.txt", sep="\t", header = T)
dim(ISR_seqtab_atab_comb_filtp_clean) # 778 4067
## [1] 778 4067
# Rarefying raw counts for converting to presence/absence
# Rarefy counts
set.seed(12345); ISR_seqtab_atab_comb_filtp_clean_rar <- as.data.frame(rrarefy(ISR_seqtab_atab_comb_filtp_clean, min(rowSums(ISR_seqtab_atab_comb_filtp_clean))))
# Convert Rarified counts to Pr/Ab:
ISR_seqtab_atab_comb_filtp_clean_prab <- data.frame((ISR_seqtab_atab_comb_filtp_clean_rar > 0)*1)
# All ISR Samples clinical metadata (See AD_clinical_metadata_processing.R)
AD_clinical_meta_2019_ISR <- read.table(file="Input_files/AD_clinical_meta_2019_ISR.txt", sep="\t", header = T)
dim(AD_clinical_meta_2019_ISR) # 265 24
## [1] 265 24
# Mother-Child ISR Samples clinical metadata (age matched)
AD_clinical_meta_2019_ISR_mch <- read.table(file="Input_files/AD_clinical_meta_2019_ISR_mch.txt", sep="\t", header = T)
dim(AD_clinical_meta_2019_ISR_mch) # 210 24
## [1] 210 24
### NMDS plot based on sites and mother/child groups
#### Compute NMDS and PERMANOVA
# Create table for all ISR samples sequenced
all_site_samples <- data.frame(row.names(ISR_seqtab_atab_comb_filtp_clean))
colnames(all_site_samples) <- "samples"
all_site_samples$sample <- all_site_samples$samples
all_site_samples$sample <- gsub("_P1_ISR", "", all_site_samples$sample)
all_site_samples$sample <- gsub("_P2_ISR", "", all_site_samples$sample)
all_site_samples$sample <- gsub("_S_ISR", "", all_site_samples$sample)
# Add subject info
all_site_samples$type <- AD_clinical_meta_2019_ISR$type[match(all_site_samples$sample, AD_clinical_meta_2019_ISR$sample)]
# Add site info
all_site_samples$site <- as.character(all_site_samples$samples)
all_site_samples$site[grepl("P1", all_site_samples$site)] <- "Supragingival Plaque"
all_site_samples$site[grepl("P2", all_site_samples$site)] <- "Subgingival Plaque"
all_site_samples$site[grepl("_S", all_site_samples$site)] <- "Soft Tissue/Saliva Swab"
# Subset for mother-child only
all_site_samples_mch <- all_site_samples[all_site_samples$type == "Mother" | all_site_samples$type == "Child", ]
nrow(all_site_samples_mch) # 710
# Subset counts
ISR_all_sites_mch_counts <- ISR_seqtab_atab_comb_filtp_clean[row.names(ISR_seqtab_atab_comb_filtp_clean) %in% all_site_samples_mch$samples, ]
ISR_all_sites_mch_counts <- ISR_all_sites_mch_counts[,colSums(ISR_all_sites_mch_counts) > 0]
dim(ISR_all_sites_mch_counts) # 710 3949
# Rarefy counts
set.seed(12345); ISR_all_sites_mch_counts_rar <- as.data.frame(rrarefy(ISR_all_sites_mch_counts, min(rowSums(ISR_all_sites_mch_counts))))
# Convert Rarified counts to Pr/Ab:
ISR_all_sites_mch_counts_prab <- data.frame((ISR_all_sites_mch_counts_rar > 0)*1)
# Compute NMDS
set.seed(12345); ISR_all_sites_mch_counts_prab.mds <- metaMDS(ISR_all_sites_mch_counts_prab, trymax = 200, autotransform = F, wascores = F)
# Making dataframe for plotting
ISR_all_sites_mch_counts_prab.mds.df <- data.frame(scores(ISR_all_sites_mch_counts_prab.mds, display = 'sites'))
# Add subject type info
ISR_all_sites_mch_counts_prab.mds.df$type <- all_site_samples$type[match(row.names(ISR_all_sites_mch_counts_prab.mds.df), all_site_samples$samples)]
ISR_all_sites_mch_counts_prab.mds.df$site <- all_site_samples$site[match(row.names(ISR_all_sites_mch_counts_prab.mds.df), all_site_samples$samples)]
# View(ISR_all_sites_mch_counts_prab.mds.df)
# Permanova
set.seed(12345); (ISR_all_sites_mch_mds_df.mch.perm <- adonis(formula = ISR_all_sites_mch_counts_prab ~ ISR_all_sites_mch_counts_prab.mds.df$type, strata = ISR_all_sites_mch_counts_prab.mds.df$site))
# 0.001 ***
set.seed(12345); (ISR_all_sites_mch_mds_df.site.perm <- adonis(formula = ISR_all_sites_mch_counts_prab ~ ISR_all_sites_mch_counts_prab.mds.df$site, strata = ISR_all_sites_mch_counts_prab.mds.df$type))
# 0.001 ***
Strain Level Sites & Subject Groups Ordination
# Saliva/Swab Samples
# Subset Data for ISR Saliva/Swab
ISR_sal_all <- ISR_seqtab_atab_comb_filtp_clean[grepl("_S", row.names(ISR_seqtab_atab_comb_filtp_clean)),]
ISR_sal <- ISR_sal_all[,colSums(ISR_sal_all) > 0]
dim(ISR_sal) # 262 2094
## [1] 262 2094
# Remove _S for matching with Metadata
row.names(ISR_sal) <- gsub("_S_ISR" , "", row.names(ISR_sal))
# Subset meta for saliva samples
ISR_sal_meta_all_mch <- AD_clinical_meta_2019_ISR_mch[AD_clinical_meta_2019_ISR_mch$sample %in% row.names(ISR_sal), ]
dim(ISR_sal_meta_all_mch) # 208 x 24
## [1] 208 24
# Remove unbalanced grps
ISR_sal_meta_all_mch <- ISR_sal_meta_all_mch[!(ISR_sal_meta_all_mch$family_id %in% names(which(table(ISR_sal_meta_all_mch$family_id) < 2))), ]
dim(ISR_sal_meta_all_mch) # 206 x 24
## [1] 206 24
# Final composition
table(ISR_sal_meta_all_mch$sub_type)
##
## Adopted Child Adopted Mother Biological Child Biological Mother
## 49 49 54 54
# Adopted Child Adopted Mother Biological Child Biological Mother
# 49 49 54 54
# Make Final Meta for Sal
ISR_sal_meta_mch <- ISR_sal_meta_all_mch
# Reset order
ISR_sal_meta_mch <- ISR_sal_meta_mch[order(ISR_sal_meta_mch$sample), ]
# Test age balance
ISR_sal_meta_mch_ch <- ISR_sal_meta_mch[ISR_sal_meta_mch$type == "Child", ]
set.seed(12345);wilcox.test(ISR_sal_meta_mch_ch$age ~ ISR_sal_meta_mch_ch$status)$p.value # p-value = 0.2937314 (Not Significant)
## [1] 0.2937314
# For Mother-Child Comparisons
ISR_sal_mch <- ISR_sal[row.names(ISR_sal) %in% ISR_sal_meta_mch$sample,] # Using original ISR_sal_meta file to extract
ISR_sal_mch <- ISR_sal_mch[,colSums(ISR_sal_mch) > 0]
dim(ISR_sal_mch) # 206 1913
## [1] 206 1913
# AD_104_M is the Adopted mother of AD_212_C (Family 43)
# Renaming AD_104_M to AD_212_M
ISR_sal_meta_mch$sample <- gsub("AD_104_M", "AD_212_M", ISR_sal_meta_mch$sample)
# Also need to rename ISR_sal ASV table
# Reset name to match meta (104_M is the mother of 212_C)
row.names(ISR_sal_mch) <- gsub("AD_104_M", "AD_212_M", row.names(ISR_sal_mch))
ISR_sal_mch <- ISR_sal_mch[order(row.names(ISR_sal_mch)), ]
# Sanity check
all(row.names(ISR_sal_mch) == sort(ISR_sal_meta_mch$sample)) # TRUE
## [1] TRUE
# Pr/Ab Analysis
# Rarefy counts
set.seed(12345); ISR_sal_mch_rar <- as.data.frame(rrarefy(ISR_sal_mch, min(rowSums(ISR_sal_mch))))
# Convert Rarified counts to Pr/Ab:
ISR_sal_mch_prab <- data.frame((ISR_sal_mch_rar > 0)*1)
# Bio
ISR_sal_mch_prab_bio <- ISR_sal_mch_prab[row.names(ISR_sal_mch_prab) %in% ISR_sal_meta_mch$sample[ISR_sal_meta_mch$status == "Biological"], ]
ISR_sal_mch_prab_bio <- ISR_sal_mch_prab_bio[,colSums(ISR_sal_mch_prab_bio) > 0]
dim(ISR_sal_mch_prab_bio) # 108 1324
## [1] 108 1324
# Adp
ISR_sal_mch_prab_adp <- ISR_sal_mch_prab[row.names(ISR_sal_mch_prab) %in% ISR_sal_meta_mch$sample[ISR_sal_meta_mch$status == "Adopted"], ]
ISR_sal_mch_prab_adp <- ISR_sal_mch_prab_adp[,colSums(ISR_sal_mch_prab_adp) > 0]
dim(ISR_sal_mch_prab_adp) # 98 1296
## [1] 98 1296
# Compute distances and melt
ISR_sal_mch_prab_sfd <- AD.melt.dist.sf(ISR_sal_mch_prab, ISR_sal_meta_mch)$dmsf
# Edit group name
ISR_sal_mch_prab_sfd$status <- gsub("Adopted", "Adoptive", ISR_sal_mch_prab_sfd$status)
# Subset diff family for M-C only
ISR_sal_mch_prab_dfd_bio <- AD.melt.dist.sf(ISR_sal_mch_prab_bio, ISR_sal_meta_mch)$dmdf
ISR_sal_mch_prab_dfd_adp <- AD.melt.dist.sf(ISR_sal_mch_prab_adp, ISR_sal_meta_mch)$dmdf
# For M-C Only
# Bio
ISR_sal_mch_prab_dfd_bio$type1 <- ISR_sal_meta_mch$type[match(ISR_sal_mch_prab_dfd_bio$Var1, ISR_sal_meta_mch$sample)]
ISR_sal_mch_prab_dfd_bio$type2 <- ISR_sal_meta_mch$type[match(ISR_sal_mch_prab_dfd_bio$Var2, ISR_sal_meta_mch$sample)]
ISR_sal_mch_prab_dfd_bio_mc <- ISR_sal_mch_prab_dfd_bio[ISR_sal_mch_prab_dfd_bio$type1 != ISR_sal_mch_prab_dfd_bio$type2, ]
# Adp
ISR_sal_mch_prab_dfd_adp$type1 <- ISR_sal_meta_mch$type[match(ISR_sal_mch_prab_dfd_adp$Var1, ISR_sal_meta_mch$sample)]
ISR_sal_mch_prab_dfd_adp$type2 <- ISR_sal_meta_mch$type[match(ISR_sal_mch_prab_dfd_adp$Var2, ISR_sal_meta_mch$sample)]
ISR_sal_mch_prab_dfd_adp_mc <- ISR_sal_mch_prab_dfd_adp[ISR_sal_mch_prab_dfd_adp$type1 != ISR_sal_mch_prab_dfd_adp$type2, ]
# Remove type columns
ISR_sal_mch_prab_dfd_bio_mc <- ISR_sal_mch_prab_dfd_bio_mc[,c(1:7)]
ISR_sal_mch_prab_dfd_adp_mc <- ISR_sal_mch_prab_dfd_adp_mc[,c(1:7)]
# Rename status
ISR_sal_mch_prab_dfd_bio_mc$status <- gsub("Unrelated", "Unrelated Biological", ISR_sal_mch_prab_dfd_bio_mc$status)
ISR_sal_mch_prab_dfd_adp_mc$status <- gsub("Unrelated", "Unrelated Adoptive", ISR_sal_mch_prab_dfd_adp_mc$status)
# Combine for plotting
ISR_sal_mch_prab_sdfd <- rbind(ISR_sal_mch_prab_sfd, ISR_sal_mch_prab_dfd_bio_mc, ISR_sal_mch_prab_dfd_adp_mc)
ISR_sal_mch_prab_sdfd$status <- factor(ISR_sal_mch_prab_sdfd$status, levels=c("Biological", "Adoptive", "Unrelated Biological", "Unrelated Adoptive"))
# Calculate P value for bio/adp
ISR_sal_mch_prab_sfd_wilc <- wilcox_pval_star(ISR_sal_mch_prab_sfd$value, ISR_sal_mch_prab_sfd$status)$pval_star
# Calculate respective P values for bio unrelated
ISR_sal_mch_prab_bio_perm_pval_star <- perm_p_value(ISR_sal_mch_prab_bio,"bray")$pval_star
# Calculate respective P values for adp unrelated
ISR_sal_mch_prab_adp_perm_pval_star <- perm_p_value(ISR_sal_mch_prab_adp,"bray")$pval_star
ggplot(ISR_sal_mch_prab_sdfd, aes(x=status, y = value)) + geom_violin(alpha=0.7, width=0.95, lwd=0.1, aes(fill=status)) +
scale_fill_manual(values=c("cornflowerblue", "blueviolet", "#7B8492", "#827290")) + geom_boxplot(alpha=0.1, width=0.25, lwd=0.2, outlier.shape = NA) +
geom_jitter(data=ISR_sal_mch_prab_sfd, alpha=0.75, size=0.3, width = 0.05, height = 0) + scale_x_discrete(labels=function(x){sub("\\s", "\n", x)}) +
scale_y_continuous(breaks=c(0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0), limits = c(0,1.2), expand = c(0,0)) +
annotate(x=1,xend=2,y=1.05,yend=1.05, lwd=0.15,geom="segment") + annotate(x=1,xend=1,y=1.05,yend=1.04,lwd=0.15,geom="segment") + annotate(x=2,xend=2,y=1.05,yend=1.04,lwd=0.15,geom="segment") +
annotate("text", label = ISR_sal_mch_prab_sfd_wilc, x = 1.5, y = 1.06, size=2) +
annotate(x=1,xend=3,y=1.10,yend=1.10, lwd=0.15,geom="segment") + annotate(x=1,xend=1,y=1.10,yend=1.09,lwd=0.15,geom="segment") + annotate(x=3,xend=3,y=1.10,yend=1.09,lwd=0.15,geom="segment") +
annotate("text", label = ISR_sal_mch_prab_bio_perm_pval_star, x = 2, y = 1.11, size=2) +
annotate(x=2,xend=4,y=1.14,yend=1.14, lwd=0.15,geom="segment") + annotate(x=2,xend=2,y=1.14,yend=1.13,lwd=0.15,geom="segment") + annotate(x=4,xend=4,y=1.14,yend=1.13,lwd=0.15,geom="segment") +
annotate("text", label = ISR_sal_mch_prab_adp_perm_pval_star, x = 3, y = 1.15, size=2) +
annotate(x=0.4, xend=0.4, y=0, yend=1, lwd=0.4, geom="segment") +
labs(x = "", y = "Mother-Child Dissimilarities (Bray-Curtis)") + theme_classic() +
theme(plot.title = element_blank(),
axis.text.x = element_text(size=9, face="bold", color="black", hjust = 0.5), axis.line.x = element_line(size = 0.2),
axis.text.y = element_text(size=9, face="bold", color="black"), axis.title.y = element_text(size=12, face="bold", color="black", margin = margin(r=15), hjust = 0.35), axis.line.y = element_blank(),
plot.margin = unit(c(0.1,0,0,0), "cm"), legend.position = "none")
#ggsave(file="output/ISR_sal_mch_violin_jitter_final.pdf", width = 3.5, height = 5.2, units = "in")
# Add BC distances to ch metadata
ISR_sal_meta_mch_ch$dist <- ISR_sal_mch_prab_sfd$value[match(ISR_sal_meta_mch_ch$sample, ISR_sal_mch_prab_sfd$Var1)]
nrow(ISR_sal_meta_mch_ch)
## [1] 103
# Subset for mother
ISR_sal_meta_mch_mo <- ISR_sal_meta_mch[ISR_sal_meta_mch$type == "Mother", ]
nrow(ISR_sal_meta_mch_mo) # 103
## [1] 103
# Output
write.table(ISR_sal_meta_mch_mo, sep = "\t", quote = F, row.names = F, file="Intermed_files/ISR_sal_meta_mch_mo.txt")
write.table(ISR_sal_meta_mch_ch, sep = "\t", quote = F, row.names = F, file="Intermed_files/ISR_sal_meta_mch_ch.txt")
### Subset Metadata and Counts
# ISR Subgingival
ISR_sub_all <- ISR_seqtab_atab_comb_filtp_clean[grepl("P2", row.names(ISR_seqtab_atab_comb_filtp_clean)),]
ISR_sub <- ISR_sub_all[,colSums(ISR_sub_all) > 0]
dim(ISR_sub) # 255 3237
## [1] 255 3250
# Remove _P1 for matching with Meta
row.names(ISR_sub) <- gsub("_P2_ISR" , "", row.names(ISR_sub))
# For Mother-Child Comparisons
ISR_sub_mch_all <- ISR_sub[row.names(ISR_sub) %in% ISR_sal_meta_mch$sample,]
ISR_sub_mch_all <- ISR_sub_mch_all[,colSums(ISR_sub_mch_all) > 0]
dim(ISR_sub_mch_all) # 197 2860
## [1] 197 2869
# Subset meta for sup mch dataset
ISR_sub_meta_mch_all <- ISR_sal_meta_mch[ISR_sal_meta_mch$sample %in% row.names(ISR_sub_mch_all), ]
table(ISR_sub_meta_mch_all$sub_type)
##
## Adopted Child Adopted Mother Biological Child Biological Mother
## 46 46 52 53
# Adopted Child Adopted Mother Biological Child Biological Mother
# 46 46 52 53
# Remove unbalanced grps
ISR_sub_meta_mch <- ISR_sub_meta_mch_all[!(ISR_sub_meta_mch_all$family_id %in% names(which(table(ISR_sub_meta_mch_all$family_id) < 2))), ]
nrow(ISR_sub_meta_mch) # 190
## [1] 190
table(ISR_sub_meta_mch$sub_type)
##
## Adopted Child Adopted Mother Biological Child Biological Mother
## 44 44 51 51
# Adopted Child Adopted Mother Biological Child Biological Mother
# 44 44 51 51
# Balanced
# Test age balance
ISR_sub_meta_mch_ch <- ISR_sub_meta_mch[ISR_sub_meta_mch$type == "Child", ]
set.seed(12345);wilcox.test(ISR_sub_meta_mch_ch$age ~ ISR_sub_meta_mch_ch$status)$p.value # p-value = 0.6436 (Not Significant)
## [1] 0.6435509
# Subset counts for final comparison
ISR_sub_mch <- ISR_sub_mch_all[row.names(ISR_sub_mch_all) %in% ISR_sub_meta_mch$sample, ]
ISR_sub_mch <- ISR_sub_mch[, colSums(ISR_sub_mch) > 0]
dim(ISR_sub_mch) # 190 x 2821
## [1] 190 2821
# Re-order
ISR_sub_mch <- ISR_sub_mch[order(row.names(ISR_sub_mch)), ]
# Sanity check
all(row.names(ISR_sub_mch) == ISR_sub_meta_mch$sample) # TRUE
## [1] TRUE
## [1] 102 2036
## [1] 88 1596
### Subset Metadata and Counts
# ISR Subgingival
ISR_sup_all <- ISR_seqtab_atab_comb_filtp_clean[grepl("P1", row.names(ISR_seqtab_atab_comb_filtp_clean)),]
ISR_sup <- ISR_sup_all[,colSums(ISR_sup_all) > 0]
dim(ISR_sup) # 261 2771
## [1] 261 2780
# Remove _P1 for matching with Meta
row.names(ISR_sup) <- gsub("_P1_ISR" , "", row.names(ISR_sup))
# For Mother-Child Comparisons
ISR_sup_mch_all <- ISR_sup[row.names(ISR_sup) %in% ISR_sal_meta_mch$sample,]
ISR_sup_mch_all <- ISR_sup_mch_all[,colSums(ISR_sup_mch_all) > 0]
dim(ISR_sup_mch_all) # 202 2363
## [1] 202 2369
# Subset meta for sup mch dataset
ISR_sup_meta_mch_all <- ISR_sal_meta_mch[ISR_sal_meta_mch$sample %in% row.names(ISR_sup_mch_all), ]
table(ISR_sup_meta_mch_all$sub_type)
##
## Adopted Child Adopted Mother Biological Child Biological Mother
## 47 48 53 54
# Adopted Child Adopted Mother Biological Child Biological Mother
# 47 48 53 54
# Remove unbalanced grps
ISR_sup_meta_mch <- ISR_sup_meta_mch_all[!(ISR_sup_meta_mch_all$family_id %in% names(which(table(ISR_sup_meta_mch_all$family_id) < 2))), ]
nrow(ISR_sup_meta_mch) # 198
## [1] 198
table(ISR_sup_meta_mch$sub_type)
##
## Adopted Child Adopted Mother Biological Child Biological Mother
## 46 46 53 53
# Adopted Child Adopted Mother Biological Child Biological Mother
# 46 46 53 53
# Balanced
# Test age balance
ISR_sup_meta_mch_ch <- ISR_sup_meta_mch[ISR_sup_meta_mch$type == "Child", ]
set.seed(12345);wilcox.test(ISR_sup_meta_mch_ch$age ~ ISR_sup_meta_mch_ch$status)$p.value # p-value = 0.436127 (Not Significant)
## [1] 0.436127
# Subset counts for final comparison
ISR_sup_mch <- ISR_sup_mch_all[row.names(ISR_sup_mch_all) %in% ISR_sup_meta_mch$sample, ]
ISR_sup_mch <- ISR_sup_mch[, colSums(ISR_sup_mch) > 0]
dim(ISR_sup_mch) # 198 x 2330
## [1] 198 2336
# Re-order
ISR_sup_mch <- ISR_sup_mch[order(row.names(ISR_sup_mch)), ]
# Sanity check
all(row.names(ISR_sup_mch) == ISR_sup_meta_mch$sample) # TRUE
## [1] TRUE
## [1] 106 1546
## [1] 92 1383
# Metadata for 3 member (mother, father, child) families
ISR_sal_meta_bio_3fam <- read.table(file="Input_files/ISR_sal_meta_bio_3fam.txt", header = T, sep="\t")
dim(ISR_sal_meta_bio_3fam) # 66 x 24 (22 families of M, F, Ch)
## [1] 66 24
# Metadata for siblings
ISR_sal_meta_bio_sibs <- read.table(file="Input_files/ISR_sal_meta_bio_sibs.txt", header = T, sep="\t")
dim(ISR_sal_meta_bio_sibs) # 32 x 24 (16 pairs of siblings)
## [1] 32 24
# Metadata for 3 member (mother, father, child) families
ISR_sal_meta_bio_cpls <- read.table(file="Input_files/ISR_sal_meta_bio_cpls.txt", header = T, sep="\t")
dim(ISR_sal_meta_bio_cpls) # 44 x 24 (22 couples)
## [1] 44 24
# 3 Fam Comparisons
# a. M-ch vs F-ch, P-ch vs U-ch, Couples vs U-adults
# For Family Comparisons
ISR_sal_3fam <- ISR_sal[row.names(ISR_sal) %in% ISR_sal_meta_bio_3fam$sample,]
ISR_sal_3fam <- ISR_sal_3fam[,colSums(ISR_sal_3fam) > 0]
dim(ISR_sal_3fam) # 66 x 946
## [1] 66 949
# sanity check
all(row.names(ISR_sal_3fam) == ISR_sal_meta_bio_3fam$sample) # TRUE
## [1] TRUE
# Pr/Ab Analysis
# Rarefy counts
set.seed(12345); ISR_sal_3fam_rar <- as.data.frame(rrarefy(ISR_sal_3fam, min(rowSums(ISR_sal_3fam)))) # min(rowSums(ISR_sal_3fam)) = 25,923
# Convert Rarified counts to Pr/Ab:
ISR_sal_3fam_prab <- data.frame((ISR_sal_3fam_rar > 0)*1)
# Compute distances and melt
ISR_sal_3fam_prab_sfd <- AD.melt.dist.sf(ISR_sal_3fam_prab, ISR_sal_meta_bio_3fam)$dmsf
ISR_sal_3fam_prab_dfd <- AD.melt.dist.sf(ISR_sal_3fam_prab, ISR_sal_meta_bio_3fam)$dmdf
# Add subject type info
ISR_sal_3fam_prab_sfd$sub_type1 <- ISR_sal_meta_bio_3fam$subject_type[match(ISR_sal_3fam_prab_sfd$Var1, ISR_sal_meta_bio_3fam$sample)]
ISR_sal_3fam_prab_sfd$sub_type2 <- ISR_sal_meta_bio_3fam$subject_type[match(ISR_sal_3fam_prab_sfd$Var2, ISR_sal_meta_bio_3fam$sample)]
# Remove "Biological "
ISR_sal_3fam_prab_sfd$sub_type1 <- gsub("Biological ", "", ISR_sal_3fam_prab_sfd$sub_type1)
ISR_sal_3fam_prab_sfd$sub_type2 <- gsub("Biological ", "", ISR_sal_3fam_prab_sfd$sub_type2)
# Combine sub_type to define relationship
ISR_sal_3fam_prab_sfd$grp <- paste(ISR_sal_3fam_prab_sfd$sub_type1, ISR_sal_3fam_prab_sfd$sub_type2, sep="-")
# For Child-Father vs Child-Mother comparison
# Child-Father and Child-Mother
ISR_sal_3fam_prab_sfd_cfm <- ISR_sal_3fam_prab_sfd[ISR_sal_3fam_prab_sfd$grp == "Child-Father" | ISR_sal_3fam_prab_sfd$grp == "Child-Mother", ]
ISR_sal_3fam_prab_sfd_cfm$sub_type1 <- NULL
ISR_sal_3fam_prab_sfd_cfm$sub_type2 <- NULL
nrow(ISR_sal_3fam_prab_sfd_cfm) # 44
## [1] 44
unique(ISR_sal_3fam_prab_sfd_cfm$grp)
## [1] "Child-Father" "Child-Mother"
# "Child-Father" "Child-Mother"
# For diff fam:
# Add subject type info
ISR_sal_3fam_prab_dfd$sub_type1 <- ISR_sal_meta_bio_3fam$subject_type[match(ISR_sal_3fam_prab_dfd$Var1, ISR_sal_meta_bio_3fam$sample)]
ISR_sal_3fam_prab_dfd$sub_type2 <- ISR_sal_meta_bio_3fam$subject_type[match(ISR_sal_3fam_prab_dfd$Var2, ISR_sal_meta_bio_3fam$sample)]
# Remove "Biological "
ISR_sal_3fam_prab_dfd$sub_type1 <- gsub("Biological ", "", ISR_sal_3fam_prab_dfd$sub_type1)
ISR_sal_3fam_prab_dfd$sub_type2 <- gsub("Biological ", "", ISR_sal_3fam_prab_dfd$sub_type2)
# Combine sub_type to define relationship
ISR_sal_3fam_prab_dfd$grp <- paste(ISR_sal_3fam_prab_dfd$sub_type1, ISR_sal_3fam_prab_dfd$sub_type2, sep="-")
# For unrelated child-father
ISR_sal_3fam_prab_dfd_ucf <- ISR_sal_3fam_prab_dfd[ISR_sal_3fam_prab_dfd$grp == "Child-Father" | ISR_sal_3fam_prab_dfd$grp == "Father-Child", ]
ISR_sal_3fam_prab_dfd_ucf$sub_type1 <- NULL
ISR_sal_3fam_prab_dfd_ucf$sub_type2 <- NULL
#View(ISR_sal_3fam_prab_dfd_ucf)
# Rename grp
ISR_sal_3fam_prab_dfd_ucf$grp <- gsub("Father-Child", "Child-Father", ISR_sal_3fam_prab_dfd_ucf$grp)
ISR_sal_3fam_prab_dfd_ucf$grp <- gsub("Child-Father", "Unrelated Child-Father", ISR_sal_3fam_prab_dfd_ucf$grp)
unique(ISR_sal_3fam_prab_dfd_ucf$grp) # "Unrelated Child-Father"
## [1] "Unrelated Child-Father"
nrow(ISR_sal_3fam_prab_dfd_ucf) # 462
## [1] 462
# Ch_F vs unrel_ad_ch
# Subset meta
ISR_sal_meta_bio_3fam_ch_f <- ISR_sal_meta_bio_3fam[ISR_sal_meta_bio_3fam$sub_type != "Biological Mother", ]
nrow(ISR_sal_meta_bio_3fam_ch_f) # 44
## [1] 44
# Subset prabs
ISR_sal_3fam_prab_ch_f <- ISR_sal_3fam_prab[row.names(ISR_sal_3fam_prab) %in% ISR_sal_meta_bio_3fam_ch_f$sample,]
ISR_sal_3fam_prab_ch_f <- ISR_sal_3fam_prab_ch_f[,colSums(ISR_sal_3fam_prab_ch_f) > 0]
dim(ISR_sal_3fam_prab_ch_f) # 44 774
## [1] 44 774
# For unrelated child-Mother
ISR_sal_3fam_prab_dfd_ucm <- ISR_sal_3fam_prab_dfd[ISR_sal_3fam_prab_dfd$grp == "Child-Mother" | ISR_sal_3fam_prab_dfd$grp == "Mother-Child", ]
ISR_sal_3fam_prab_dfd_ucm$sub_type1 <- NULL
ISR_sal_3fam_prab_dfd_ucm$sub_type2 <- NULL
#View(ISR_sal_3fam_prab_dfd_ucm)
# Rename grp
ISR_sal_3fam_prab_dfd_ucm$grp <- gsub("Mother-Child", "Child-Mother", ISR_sal_3fam_prab_dfd_ucm$grp)
ISR_sal_3fam_prab_dfd_ucm$grp <- gsub("Child-Mother", "Unrelated Child-Mother", ISR_sal_3fam_prab_dfd_ucm$grp)
unique(ISR_sal_3fam_prab_dfd_ucm$grp) # "Unrelated Child-Mother"
## [1] "Unrelated Child-Mother"
nrow(ISR_sal_3fam_prab_dfd_ucm) # 462
## [1] 462
# ch_m vs unrel_ad_ch
# Subset meta
ISR_sal_meta_bio_3fam_ch_m <- ISR_sal_meta_bio_3fam[ISR_sal_meta_bio_3fam$sub_type != "Biological Father", ]
nrow(ISR_sal_meta_bio_3fam_ch_m) # 44
## [1] 44
# Subset prabs
ISR_sal_3fam_prab_ch_m <- ISR_sal_3fam_prab[row.names(ISR_sal_3fam_prab) %in% ISR_sal_meta_bio_3fam_ch_m$sample,]
ISR_sal_3fam_prab_ch_m <- ISR_sal_3fam_prab_ch_m[,colSums(ISR_sal_3fam_prab_ch_m) > 0]
dim(ISR_sal_3fam_prab_ch_m) # 44 740
## [1] 44 740
# Siblings comparisons
# Subset Counts
ISR_sal_sibs <- ISR_sal[row.names(ISR_sal) %in% ISR_sal_meta_bio_sibs$sample,]
ISR_sal_sibs <- ISR_sal_sibs[,colSums(ISR_sal_sibs) > 0]
dim(ISR_sal_sibs) # 32 532
## [1] 32 535
# sanity check
all(row.names(ISR_sal_sibs) == ISR_sal_meta_bio_sibs$sample) # TRUE
## [1] TRUE
# Pr/Ab Analysis
# Rarefy counts
set.seed(12345); ISR_sal_sibs_rar <- as.data.frame(rrarefy(ISR_sal_sibs, min(rowSums(ISR_sal_sibs))))
# Convert Rarified counts to Pr/Ab:
ISR_sal_sibs_prab <- data.frame((ISR_sal_sibs_rar > 0)*1)
# Compute distances and melt
ISR_sal_sibs_prab_sfd <- AD.melt.dist.sf(ISR_sal_sibs_prab, ISR_sal_meta_bio_sibs)$dmsf
ISR_sal_sibs_prab_sfd$grp <- "Siblings"
ISR_sal_sibs_prab_dfd <- AD.melt.dist.sf(ISR_sal_sibs_prab, ISR_sal_meta_bio_sibs)$dmdf
ISR_sal_sibs_prab_dfd$grp <- "Unrelated Children"
# Couples
# Subset Counts
ISR_sal_cpls <- ISR_sal[row.names(ISR_sal) %in% ISR_sal_meta_bio_cpls$sample,]
ISR_sal_cpls <- ISR_sal_cpls[,colSums(ISR_sal_cpls) > 0]
dim(ISR_sal_cpls) # 44 746
## [1] 44 749
# sanity check
all(row.names(ISR_sal_cpls) == ISR_sal_meta_bio_cpls$sample) # TRUE
## [1] TRUE
# Pr/Ab Analysis
# Rarefy counts
set.seed(12345); ISR_sal_cpls_rar <- as.data.frame(rrarefy(ISR_sal_cpls, min(rowSums(ISR_sal_cpls))))
# Convert Rarified counts to Pr/Ab:
ISR_sal_cpls_prab <- data.frame((ISR_sal_cpls_rar > 0)*1)
# Compute distances and melt
ISR_sal_cpls_prab_sfd <- AD.melt.dist.sf(ISR_sal_cpls_prab, ISR_sal_meta_bio_cpls)$dmsf
ISR_sal_cpls_prab_sfd$grp <- "Couples"
ISR_sal_cpls_prab_dfd <- AD.melt.dist.sf(ISR_sal_cpls_prab, ISR_sal_meta_bio_cpls)$dmdf
ISR_sal_cpls_prab_dfd$grp <- "Unrelated Adults"
# Add TR data (See ISR_sup_tr.R for Tech Rep data generation)
tr_prab_dist_melt_ss_sal <- read.table("Input_files/tr_prab_dist_melt_ss_sal.txt", header = T, sep = "\t")
tr_prab_dist_melt_ss_sal$grp <- tr_prab_dist_melt_ss_sal$rel
# Combine for plotting
ISR_sal_3fam_all <- rbind(ISR_sal_sibs_prab_sfd, ISR_sal_sibs_prab_dfd, ISR_sal_cpls_prab_sfd, ISR_sal_cpls_prab_dfd,
ISR_sal_3fam_prab_sfd_cfm, ISR_sal_3fam_prab_dfd_ucm, ISR_sal_3fam_prab_dfd_ucf)
# Subset
ISR_sal_3fam_all_plot <- ISR_sal_3fam_all[,c("value", "grp")]
ISR_sal_3fam_all_plot_tr <- rbind(ISR_sal_3fam_all_plot, tr_prab_dist_melt_ss_sal[,c("value", "grp")])
# Define levels (sorting by mean distance of grps, increasing order)
ISR_sal_3fam_all_plot_tr$grp <- factor(ISR_sal_3fam_all_plot_tr$grp, levels=levels(reorder(ISR_sal_3fam_all_plot_tr$grp, ISR_sal_3fam_all_plot_tr$value, median)))
levels(ISR_sal_3fam_all_plot_tr$grp)
## [1] "Tech. Replicates" "Siblings"
## [3] "Couples" "Child-Mother"
## [5] "Child-Father" "Unrelated Adults"
## [7] "Unrelated Children" "Unrelated Child-Mother"
## [9] "Unrelated Child-Father"
# "Tech. Replicates" "Siblings" "Couples" "Child-Mother" "Child-Father" "Unrelated Adults" "Unrelated Children" "Unrelated Child-Mother"
# "Unrelated Child-Father"
ISR_sal_3fam_all_plot_tr$grp <- factor(ISR_sal_3fam_all_plot_tr$grp, levels(ISR_sal_3fam_all_plot_tr$grp)[c(2:9,1)])
levels(ISR_sal_3fam_all_plot_tr$grp)
## [1] "Siblings" "Couples"
## [3] "Child-Mother" "Child-Father"
## [5] "Unrelated Adults" "Unrelated Children"
## [7] "Unrelated Child-Mother" "Unrelated Child-Father"
## [9] "Tech. Replicates"
# "Siblings" "Couples" "Child-Mother" "Child-Father" "Unrelated Adults" "Unrelated Children" "Unrelated Child-Mother" "Unrelated Child-Father"
# "Tech. Replicates"
# Define colors
ISR_sal_fam_dists_cols_TR <- c("#CD9600", "#F8766D", "#7CAE00", "#00BE67", "#B7A09E", "#B0A68A", "#7F8D5D", "#8AAD9D", "white")
# Swab ISR 3Fam Violin Plot with Jitters
ggplot(ISR_sal_3fam_all_plot_tr, aes(x=grp, y = value)) + geom_violin(alpha=0.9, width=0.95, lwd=0.1, aes(fill=grp), position = ) +
geom_boxplot(data=ISR_sal_3fam_all_plot_tr[ISR_sal_3fam_all_plot_tr$grp != "Tech. Replicates", ], alpha=0.2, width=0.1, lwd=0.2, outlier.shape = NA) +
scale_fill_manual(values=ISR_sal_fam_dists_cols_TR) +
geom_point(data=tr_prab_dist_melt_ss_sal, alpha=0.75, color="black", pch=21, stroke = 0.5, size=1,fill="white") + scale_x_discrete(labels=function(x){sub("\\s", "\n", x)}) +
scale_y_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1.0), limits = c(0,1), expand = c(0,0)) +
annotate(x=0.4, xend=0.4, y=0, yend=1, lwd=0.4, geom="segment") +
labs(title = "Soft Tissue\n", fill="Group", x = "", y = "Bray-Curtis Dissimilarity Index") + theme_classic() +
theme(plot.title = element_blank(), axis.text.x = element_text(size=9, face="bold", color="black", hjust = 0.5), axis.line.x = element_line(size = 0.2),
axis.text.y = element_text(size=9, face="bold", color="black"), axis.title.y = element_text(size=12, face="bold", color="black", margin = margin(r=15), hjust = 0.35), axis.line.y = element_blank(),
plot.margin = unit(c(0.1,0,0,0), "cm"), legend.position = "none")
#ggsave(file="output/ISR_Swab_extended_violin_cpl_chm_final.pdf", width = 9, height = 3.5, units = "in")
# Statistics Calculations
# Child-Father vs Child-Mother using Wilcox Test
wilcox_pval_star(ISR_sal_3fam_prab_sfd_cfm$value, ISR_sal_3fam_prab_sfd_cfm$grp)$pval_star # ns
## [1] "ns"
# Couples vs Siblings
ISR_sal_sfd_cpls_sibs <- rbind(ISR_sal_sibs_prab_sfd, ISR_sal_cpls_prab_sfd)
unique(ISR_sal_sfd_cpls_sibs$grp) # "Siblings" "Couples"
## [1] "Siblings" "Couples"
wilcox_pval_star(ISR_sal_sfd_cpls_sibs$value, ISR_sal_sfd_cpls_sibs$grp)$pval_star # ns
## [1] "ns"
# Couples vs Child-Mother
ISR_sal_sfd_cpls_cm <- rbind(ISR_sal_cpls_prab_sfd, ISR_sal_3fam_prab_sfd_cfm[ISR_sal_3fam_prab_sfd_cfm$grp == "Child-Mother", ])
unique(ISR_sal_sfd_cpls_cm$grp) # ""Couples" "Child-Mother"
## [1] "Couples" "Child-Mother"
wilcox_pval_star(ISR_sal_sfd_cpls_cm$value, ISR_sal_sfd_cpls_cm$grp)$pval_star # ***
## [1] "***"
# Couples vs Child-Father
ISR_sal_sfd_cpls_cf <- rbind(ISR_sal_cpls_prab_sfd, ISR_sal_3fam_prab_sfd_cfm[ISR_sal_3fam_prab_sfd_cfm$grp == "Child-Father", ])
unique(ISR_sal_sfd_cpls_cf$grp) # "Couples" "Child-Father"
## [1] "Couples" "Child-Father"
wilcox_pval_star(ISR_sal_sfd_cpls_cf$value, ISR_sal_sfd_cpls_cf$grp)$pval_star # ***
## [1] "***"
# Siblings vs Child-Mother
ISR_sal_sfd_sibs_cm <- rbind(ISR_sal_sibs_prab_sfd, ISR_sal_3fam_prab_sfd_cfm[ISR_sal_3fam_prab_sfd_cfm$grp == "Child-Mother", ])
unique(ISR_sal_sfd_sibs_cm$grp) # "Siblings" "Child-Mother"
## [1] "Siblings" "Child-Mother"
wilcox_pval_star(ISR_sal_sfd_sibs_cm$value, ISR_sal_sfd_sibs_cm$grp)$pval_star # *
## [1] "*"
# Siblings vs Child-Father
ISR_sal_sfd_sibs_cf <- rbind(ISR_sal_sibs_prab_sfd, ISR_sal_3fam_prab_sfd_cfm[ISR_sal_3fam_prab_sfd_cfm$grp == "Child-Father", ])
unique(ISR_sal_sfd_sibs_cf$grp) # "Siblings" "Child-Father"
## [1] "Siblings" "Child-Father"
wilcox_pval_star(ISR_sal_sfd_sibs_cf$value, ISR_sal_sfd_sibs_cf$grp)$pval_star # *
## [1] "*"
# Couples vs Unrelated Adults using Permutation test
perm_p_value(ISR_sal_cpls_prab, "bray")$pval_star # ***
## [1] "***"
# Sibs vs unrelated children using Permutation test
perm_p_value(ISR_sal_sibs_prab, "bray")$pval_star # ***
## [1] "***"
# Child-Father vs unrelated Child-Father using Permutation test
perm_p_value(ISR_sal_3fam_prab_ch_f, "bray")$pval_star # *
## [1] "*"
# Child-Mother vs unrelated Child-Mother using Permutation test
perm_p_value(ISR_sal_3fam_prab_ch_m, "bray")$pval_star # **
## [1] "**"
### Computing Mother-Child Distances
# Convert to percentage (relative abundance/levels)
ISR_sal_mch_pct <- decostand(ISR_sal_mch, method = "total")
dim(ISR_sal_mch_pct) # 206 1906
## [1] 206 1913
# Bio
ISR_sal_mch_pct_bio <- ISR_sal_mch_pct[row.names(ISR_sal_mch_pct) %in% ISR_sal_meta_mch$sample[ISR_sal_meta_mch$status == "Biological"], ]
ISR_sal_mch_pct_bio <- ISR_sal_mch_pct_bio[,colSums(ISR_sal_mch_pct_bio) > 0]
dim(ISR_sal_mch_pct_bio) # 108 1340
## [1] 108 1345
# Adp
ISR_sal_mch_pct_adp <- ISR_sal_mch_pct[row.names(ISR_sal_mch_pct) %in% ISR_sal_meta_mch$sample[ISR_sal_meta_mch$status == "Adopted"], ]
ISR_sal_mch_pct_adp <- ISR_sal_mch_pct_adp[,colSums(ISR_sal_mch_pct_adp) > 0]
dim(ISR_sal_mch_pct_adp) # 98 1308
## [1] 98 1310
# Compute distances and melt
ISR_sal_mch_pct_sfd <- AD.melt.dist.sf(ISR_sal_mch_pct, ISR_sal_meta_mch)$dmsf
# Edit group name
ISR_sal_mch_pct_sfd$status <- gsub("Adopted", "Adoptive", ISR_sal_mch_pct_sfd$status)
# Subset diff family for M-C only
ISR_sal_mch_pct_dfd_bio <- AD.melt.dist.sf(ISR_sal_mch_pct_bio, ISR_sal_meta_mch)$dmdf
ISR_sal_mch_pct_dfd_adp <- AD.melt.dist.sf(ISR_sal_mch_pct_adp, ISR_sal_meta_mch)$dmdf
# For M-C Only
# Bio
ISR_sal_mch_pct_dfd_bio$type1 <- ISR_sal_meta_mch$type[match(ISR_sal_mch_pct_dfd_bio$Var1, ISR_sal_meta_mch$sample)]
ISR_sal_mch_pct_dfd_bio$type2 <- ISR_sal_meta_mch$type[match(ISR_sal_mch_pct_dfd_bio$Var2, ISR_sal_meta_mch$sample)]
ISR_sal_mch_pct_dfd_bio_mc <- ISR_sal_mch_pct_dfd_bio[ISR_sal_mch_pct_dfd_bio$type1 != ISR_sal_mch_pct_dfd_bio$type2, ]
# Adp
ISR_sal_mch_pct_dfd_adp$type1 <- ISR_sal_meta_mch$type[match(ISR_sal_mch_pct_dfd_adp$Var1, ISR_sal_meta_mch$sample)]
ISR_sal_mch_pct_dfd_adp$type2 <- ISR_sal_meta_mch$type[match(ISR_sal_mch_pct_dfd_adp$Var2, ISR_sal_meta_mch$sample)]
ISR_sal_mch_pct_dfd_adp_mc <- ISR_sal_mch_pct_dfd_adp[ISR_sal_mch_pct_dfd_adp$type1 != ISR_sal_mch_pct_dfd_adp$type2, ]
# Remove type columns
ISR_sal_mch_pct_dfd_bio_mc <- ISR_sal_mch_pct_dfd_bio_mc[,c(1:7)]
ISR_sal_mch_pct_dfd_adp_mc <- ISR_sal_mch_pct_dfd_adp_mc[,c(1:7)]
# Rename status
ISR_sal_mch_pct_dfd_bio_mc$status <- gsub("Unrelated", "Unrelated Biological", ISR_sal_mch_pct_dfd_bio_mc$status)
ISR_sal_mch_pct_dfd_adp_mc$status <- gsub("Unrelated", "Unrelated Adoptive", ISR_sal_mch_pct_dfd_adp_mc$status)
# Combine for plotting
ISR_sal_mch_pct_sdfd <- rbind(ISR_sal_mch_pct_sfd, ISR_sal_mch_pct_dfd_bio_mc, ISR_sal_mch_pct_dfd_adp_mc)
ISR_sal_mch_pct_sdfd$status <- factor(ISR_sal_mch_pct_sdfd$status, levels=c("Biological", "Adoptive", "Unrelated Biological", "Unrelated Adoptive"))
# Calculate P value for bio/adp
ISR_sal_mch_pct_sfd_wilc <- wilcox_pval_star(ISR_sal_mch_pct_sfd$value, ISR_sal_mch_pct_sfd$status)$pval_star
# Calculate respective P values for bio unrelated
ISR_sal_mch_pct_bio_perm_pval_star <- perm_p_value(ISR_sal_mch_pct_bio,"bray")$pval_star
# Calculate respective P values for adp unrelated
ISR_sal_mch_pct_adp_perm_pval_star <- perm_p_value(ISR_sal_mch_pct_adp,"bray")$pval_star
# Compute distances and melt
ISR_sal_mch_prab_sfd_jc <- AD.melt.dist.sf(ISR_sal_mch_prab, ISR_sal_meta_mch)$jmsf
# Edit group name
ISR_sal_mch_prab_sfd_jc$status <- gsub("Adopted", "Adoptive", ISR_sal_mch_prab_sfd_jc$status)
# Subset diff family for M-C only
ISR_sal_mch_prab_dfd_bio <- AD.melt.dist.sf(ISR_sal_mch_prab_bio, ISR_sal_meta_mch)$dmdf
ISR_sal_mch_prab_dfd_adp <- AD.melt.dist.sf(ISR_sal_mch_prab_adp, ISR_sal_meta_mch)$dmdf
# For M-C Only
# Bio
ISR_sal_mch_prab_dfd_bio$type1 <- ISR_sal_meta_mch$type[match(ISR_sal_mch_prab_dfd_bio$Var1, ISR_sal_meta_mch$sample)]
ISR_sal_mch_prab_dfd_bio$type2 <- ISR_sal_meta_mch$type[match(ISR_sal_mch_prab_dfd_bio$Var2, ISR_sal_meta_mch$sample)]
ISR_sal_mch_prab_dfd_bio_mc <- ISR_sal_mch_prab_dfd_bio[ISR_sal_mch_prab_dfd_bio$type1 != ISR_sal_mch_prab_dfd_bio$type2, ]
# Adp
ISR_sal_mch_prab_dfd_adp$type1 <- ISR_sal_meta_mch$type[match(ISR_sal_mch_prab_dfd_adp$Var1, ISR_sal_meta_mch$sample)]
ISR_sal_mch_prab_dfd_adp$type2 <- ISR_sal_meta_mch$type[match(ISR_sal_mch_prab_dfd_adp$Var2, ISR_sal_meta_mch$sample)]
ISR_sal_mch_prab_dfd_adp_mc <- ISR_sal_mch_prab_dfd_adp[ISR_sal_mch_prab_dfd_adp$type1 != ISR_sal_mch_prab_dfd_adp$type2, ]
# Remove type columns
ISR_sal_mch_prab_dfd_bio_mc <- ISR_sal_mch_prab_dfd_bio_mc[,c(1:7)]
ISR_sal_mch_prab_dfd_adp_mc <- ISR_sal_mch_prab_dfd_adp_mc[,c(1:7)]
# Rename status
ISR_sal_mch_prab_dfd_bio_mc$status <- gsub("Unrelated", "Unrelated Biological", ISR_sal_mch_prab_dfd_bio_mc$status)
ISR_sal_mch_prab_dfd_adp_mc$status <- gsub("Unrelated", "Unrelated Adoptive", ISR_sal_mch_prab_dfd_adp_mc$status)
# Combine for plotting
ISR_sal_mch_prab_sdfd_jc <- rbind(ISR_sal_mch_prab_sfd_jc, ISR_sal_mch_prab_dfd_bio_mc, ISR_sal_mch_prab_dfd_adp_mc)
ISR_sal_mch_prab_sdfd_jc$status <- factor(ISR_sal_mch_prab_sdfd_jc$status, levels=c("Biological", "Adoptive", "Unrelated Biological", "Unrelated Adoptive"))
# Calculate P value for bio/adp
ISR_sal_mch_prab_sfd_jc_wilc <- wilcox_pval_star(ISR_sal_mch_prab_sfd_jc$value, ISR_sal_mch_prab_sfd_jc$status)$pval_star
# Calculate respective P values for bio unrelated
ISR_sal_mch_prab_bio_perm_pval_star_jc <- perm_p_value(ISR_sal_mch_prab_bio,"jaccard")$pval_star
# Calculate respective P values for adp unrelated
ISR_sal_mch_prab_adp_perm_pval_star_jc <- perm_p_value(ISR_sal_mch_prab_adp,"jaccard")$pval_star
ggplot(ISR_sal_mch_prab_sdfd_jc, aes(x=status, y = value)) + geom_violin(alpha=0.7, width=0.95, lwd=0.1, aes(fill=status)) +
scale_fill_manual(values=c("cornflowerblue", "blueviolet", "#7B8492", "#827290")) + geom_boxplot(alpha=0.1, width=0.25, lwd=0.2, outlier.shape = NA) +
geom_jitter(data=ISR_sal_mch_prab_sfd_jc, alpha=0.75, size=0.3, width = 0.05, height = 0) + scale_x_discrete(labels=function(x){sub("\\s", "\n", x)}) +
scale_y_continuous(breaks=c(0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0), limits = c(0,1.2), expand = c(0,0)) +
annotate(x=1,xend=2,y=1.05,yend=1.05, lwd=0.15,geom="segment") + annotate(x=1,xend=1,y=1.05,yend=1.04,lwd=0.15,geom="segment") + annotate(x=2,xend=2,y=1.05,yend=1.04,lwd=0.15,geom="segment") +
annotate("text", label = ISR_sal_mch_prab_sfd_jc_wilc, x = 1.5, y = 1.06, size=2) +
annotate(x=1,xend=3,y=1.10,yend=1.10, lwd=0.15,geom="segment") + annotate(x=1,xend=1,y=1.10,yend=1.09,lwd=0.15,geom="segment") + annotate(x=3,xend=3,y=1.10,yend=1.09,lwd=0.15,geom="segment") +
annotate("text", label = ISR_sal_mch_prab_bio_perm_pval_star_jc, x = 2, y = 1.11, size=2) +
annotate(x=2,xend=4,y=1.14,yend=1.14, lwd=0.15,geom="segment") + annotate(x=2,xend=2,y=1.14,yend=1.13,lwd=0.15,geom="segment") + annotate(x=4,xend=4,y=1.14,yend=1.13,lwd=0.15,geom="segment") +
annotate("text", label = ISR_sal_mch_prab_adp_perm_pval_star_jc, x = 3, y = 1.15, size=2) +
annotate(x=0.4, xend=0.4, y=0, yend=1, lwd=0.4, geom="segment") +
labs(x = "", y = "Mother-Child Dissimilarities (Jaccard)") + theme_classic() +
theme(plot.title = element_blank(),
axis.text.x = element_text(size=9, face="bold", color="black", hjust = 0.5), axis.line.x = element_line(size = 0.2),
axis.text.y = element_text(size=9, face="bold", color="black"), axis.title.y = element_text(size=12, face="bold", color="black", margin = margin(r=15), hjust = 0.35), axis.line.y = element_blank(),
plot.margin = unit(c(0.1,0,0,0), "cm"), legend.position = "none")
#ggsave(file="output/ISR_sal_mch_violin_jitter_jaccard.pdf", width = 3.5, height = 5.2, units = "in")
# Load ISR db tax file
ISR_db_tax <- read.table(file="Input_files/ISR_db_oct2018_tax.txt", header=T, sep="\t")
# Load BLAST results
ISR_blastres_sal <- read.table(file="Input_files/ISR_sal_blast_top2.txt", header=F, sep="\t", row.names = 1)
dim(ISR_blastres_sal) # 2094 x 6 (Includes ALL ASVs)
## [1] 2094 6
colnames(ISR_blastres_sal) <- c("db1","score1","m_size1","db2","score2","m_size2")
# Adding OTUs
ISR_blastres_sal$otu1 <- ISR_db_tax$CORE_otu[match(ISR_blastres_sal$db1,ISR_db_tax$db_id)]
ISR_blastres_sal$otu2 <- ISR_db_tax$CORE_otu[match(ISR_blastres_sal$db2,ISR_db_tax$db_id)]
ISR_blastres_sal$genus <- ISR_db_tax$genus[match(ISR_blastres_sal$db2,ISR_db_tax$db_id)]
# Subset for BLAST score >= 92%
ISR_blast_matches_sal_92 <- ISR_blastres_sal[ISR_blastres_sal$score1 >= 0.92,]
nrow(ISR_blast_matches_sal_92)/nrow(ISR_blastres_sal)*100 # 69.1% over 92% matched
## [1] 69.1022
ISR_blast_matches_sal <- ISR_blast_matches_sal_92
# Subset counts to include those ASVs matched at 90% or more with ISRdb
ISR_sal_matched <- ISR_sal[,colnames(ISR_sal) %in% row.names(ISR_blast_matches_sal)]
dim(ISR_sal_matched) # 262 1447
## [1] 262 1447
# Making strain table
strain_table_sal <- data.frame(sort(table(droplevels(ISR_blast_matches_sal$otu1)), decreasing = T), check.names = F)
colnames(strain_table_sal) <- c("spOTU","ISR_types")
dim(strain_table_sal) # 73 x 2 (73 species)
## [1] 73 2
# Melt as matrix and add OTU info
AD_ISR_sal_m <- melt(as.matrix(ISR_sal_matched))
AD_ISR_sal_m$otu <- ISR_blast_matches_sal$otu1[match(AD_ISR_sal_m$Var2, row.names(ISR_blast_matches_sal))]
colnames(AD_ISR_sal_m) <- c("Sample", "ASV", "Counts", "OTU")
# Reshape
AD_ISR_sal_m_otu <- dcast(AD_ISR_sal_m, Sample ~ OTU, value.var = "Counts", fun.aggregate = sum)
row.names(AD_ISR_sal_m_otu) <- AD_ISR_sal_m_otu$Sample
AD_ISR_sal_m_otu$Sample <- NULL
dim(AD_ISR_sal_m_otu) # 262 x 73
## [1] 262 73
# Add count data to Strain table
strain_table_sal$Counts <- colSums(AD_ISR_sal_m_otu)[match(strain_table_sal$spOTU, colnames(AD_ISR_sal_m_otu))]
# Prevalence
AD_ISR_sal_m_otu_prabs <- data.frame((AD_ISR_sal_m_otu > 0)*1, check.names = F)
# Add prevalence to strain table
strain_table_sal$Prevalence <- colSums(AD_ISR_sal_m_otu_prabs)[match(strain_table_sal$spOTU, colnames(AD_ISR_sal_m_otu_prabs))]
strain_table_sal$prev_perc <- round(strain_table_sal$Prevalence/nrow(AD_ISR_sal_m_otu_prabs)*100,2)
# Percentage of total
strain_table_sal$per_tot <- round(strain_table_sal$Counts/sum(strain_table_sal$Counts)*100,2)
# View table
head(strain_table_sal)
## spOTU ISR_types Counts
## 1 Rothia mucilaginosa 200 1184568
## 2 Granulicatella adiacens 197 277379
## 3 Streptococcus mitis pneumoniae infantis oralis 179 3385120
## 4 Moraxella osloensis 109 38024
## 5 Veillonella atypica dispar parvula 102 39609
## 6 Streptococcus parasanguinis 85 327195
## Prevalence prev_perc per_tot
## 1 259 98.85 19.11
## 2 259 98.85 4.47
## 3 262 100.00 54.61
## 4 119 45.42 0.61
## 5 218 83.21 0.64
## 6 250 95.42 5.28
# top 10 most abundant genera
strain_table_sal[order(strain_table_sal$per_tot, decreasing = T),]$spOTU[1:10]
## [1] Streptococcus mitis pneumoniae infantis oralis
## [2] Rothia mucilaginosa
## [3] Streptococcus vestibularis salivarius
## [4] Streptococcus parasanguinis
## [5] Granulicatella adiacens
## [6] Streptococcus australis
## [7] Streptococcus sanguinis
## [8] Gemella sanguinis
## [9] Atopobium parvulum
## [10] Veillonella atypica dispar parvula
## 73 Levels: Rothia mucilaginosa ... Staphylococcus epidermidis
# [1] Streptococcus mitis pneumoniae infantis oralis Rothia mucilaginosa Streptococcus vestibularis salivarius Streptococcus parasanguinis
# [5] Granulicatella adiacens Streptococcus australis Streptococcus sanguinis Gemella sanguinis
# [9] Atopobium parvulum Veillonella atypica dispar parvula
# Order by per of total abundance
strain_table_sal <- strain_table_sal[order(strain_table_sal$per_tot, decreasing = T),]
# Write table
write.table(strain_table_sal, file="output/AD ISR Sal Strain Table.txt", sep = "\t", quote = F, row.names = F)
# For mother-child comparisons (used in species_dist_calc_sal function)
ISR_sal_matched_mch <- ISR_sal_matched[row.names(ISR_sal_matched) %in% row.names(ISR_sal_mch),]
ISR_sal_matched_mch <- ISR_sal_matched_mch[,colSums(ISR_sal_matched_mch) > 0]
dim(ISR_sal_matched_mch) # 205 1344
## [1] 205 1344
# Compute distances for each species (10 most abundant in the saliva dataset)
mitis_grp_species_sal_prab_sfd <- species_dist_calc_sal("Streptococcus mitis pneumoniae infantis oralis")
Rothia_m_species_sal_prab_sfd <- species_dist_calc_sal("Rothia mucilaginosa")
## Warning in vegdist(asv_table, method = "bray"): you have empty rows: their
## dissimilarities may be meaningless in method "bray"
## Warning in vegdist(asv_table, method = "bray"): missing values in results
## Warning in vegdist(asv_table, method = "jaccard"): you have empty rows:
## their dissimilarities may be meaningless in method "jaccard"
## Warning in vegdist(asv_table, method = "jaccard"): missing values in
## results
Strep_vs_species_sal_prab_sfd <- species_dist_calc_sal("Streptococcus vestibularis salivarius")
## Warning in vegdist(asv_table, method = "bray"): you have empty rows: their
## dissimilarities may be meaningless in method "bray"
## Warning in vegdist(asv_table, method = "bray"): missing values in results
## Warning in vegdist(asv_table, method = "jaccard"): you have empty rows:
## their dissimilarities may be meaningless in method "jaccard"
## Warning in vegdist(asv_table, method = "jaccard"): missing values in
## results
Strep_para_species_sal_prab_sfd <- species_dist_calc_sal("Streptococcus parasanguinis")
## Warning in vegdist(asv_table, method = "bray"): you have empty rows: their
## dissimilarities may be meaningless in method "bray"
## Warning in vegdist(asv_table, method = "bray"): missing values in results
## Warning in vegdist(asv_table, method = "jaccard"): you have empty rows:
## their dissimilarities may be meaningless in method "jaccard"
## Warning in vegdist(asv_table, method = "jaccard"): missing values in
## results
Gran_ad_species_sal_prab_sfd <- species_dist_calc_sal("Granulicatella adiacens")
## Warning in vegdist(asv_table, method = "bray"): you have empty rows: their
## dissimilarities may be meaningless in method "bray"
## Warning in vegdist(asv_table, method = "bray"): missing values in results
## Warning in vegdist(asv_table, method = "jaccard"): you have empty rows:
## their dissimilarities may be meaningless in method "jaccard"
## Warning in vegdist(asv_table, method = "jaccard"): missing values in
## results
Strep_aus_species_sal_prab_sfd <- species_dist_calc_sal("Streptococcus australis")
## Warning in vegdist(asv_table, method = "bray"): you have empty rows: their
## dissimilarities may be meaningless in method "bray"
## Warning in vegdist(asv_table, method = "bray"): missing values in results
## Warning in vegdist(asv_table, method = "jaccard"): you have empty rows:
## their dissimilarities may be meaningless in method "jaccard"
## Warning in vegdist(asv_table, method = "jaccard"): missing values in
## results
Strep_sang_species_sal_prab_sfd <- species_dist_calc_sal("Streptococcus sanguinis")
## Warning in vegdist(asv_table, method = "bray"): you have empty rows: their
## dissimilarities may be meaningless in method "bray"
## Warning in vegdist(asv_table, method = "bray"): missing values in results
## Warning in vegdist(asv_table, method = "jaccard"): you have empty rows:
## their dissimilarities may be meaningless in method "jaccard"
## Warning in vegdist(asv_table, method = "jaccard"): missing values in
## results
Gem_sang_species_sal_prab_sfd <- species_dist_calc_sal("Gemella sanguinis")
## Warning in vegdist(asv_table, method = "bray"): you have empty rows: their
## dissimilarities may be meaningless in method "bray"
## Warning in vegdist(asv_table, method = "bray"): missing values in results
## Warning in vegdist(asv_table, method = "jaccard"): you have empty rows:
## their dissimilarities may be meaningless in method "jaccard"
## Warning in vegdist(asv_table, method = "jaccard"): missing values in
## results
Ato_par_species_sal_prab_sfd <- species_dist_calc_sal("Atopobium parvulum")
## Warning in vegdist(asv_table, method = "bray"): you have empty rows: their
## dissimilarities may be meaningless in method "bray"
## Warning in vegdist(asv_table, method = "bray"): missing values in results
## Warning in vegdist(asv_table, method = "jaccard"): you have empty rows:
## their dissimilarities may be meaningless in method "jaccard"
## Warning in vegdist(asv_table, method = "jaccard"): missing values in
## results
Veillonella_adp_sal_prab_sfd <- species_dist_calc_sal("Veillonella atypica dispar parvula")
## Warning in vegdist(asv_table, method = "bray"): you have empty rows: their
## dissimilarities may be meaningless in method "bray"
## Warning in vegdist(asv_table, method = "bray"): missing values in results
## Warning in vegdist(asv_table, method = "jaccard"): you have empty rows:
## their dissimilarities may be meaningless in method "jaccard"
## Warning in vegdist(asv_table, method = "jaccard"): missing values in
## results
# For combined visualization
# Combined Distances
Combined_sp_dists_sal <- rbind(mitis_grp_species_sal_prab_sfd, Rothia_m_species_sal_prab_sfd, Strep_para_species_sal_prab_sfd, Strep_vs_species_sal_prab_sfd,
Gran_ad_species_sal_prab_sfd, Strep_aus_species_sal_prab_sfd, Strep_sang_species_sal_prab_sfd,
Gem_sang_species_sal_prab_sfd, Ato_par_species_sal_prab_sfd, Veillonella_adp_sal_prab_sfd)
# Set factors
Combined_sp_dists_sal$otu <- factor(Combined_sp_dists_sal$otu, levels=strain_table_sal$spOTU[order(strain_table_sal$per_tot, decreasing = T)][1:10])
# Rename factor levels
levels(Combined_sp_dists_sal$otu) <- gsub("Streptococcus mitis pneumoniae infantis oralis", "Streptococcus mitis Group", levels(Combined_sp_dists_sal$otu))
levels(Combined_sp_dists_sal$otu) <- gsub("Streptococcus vestibularis salivarius", "Streptococcus salivarius", levels(Combined_sp_dists_sal$otu))
levels(Combined_sp_dists_sal$otu) <- gsub("Veillonella atypica dispar parvula", "Veillonella atypica Group", levels(Combined_sp_dists_sal$otu))
# Visualization of 10 Species plots (Violin with labeller)
boxp_sal_strains_poster <- ggplot(na.omit(Combined_sp_dists_sal), aes(y=value)) + facet_wrap(~otu, nrow = 2, labeller = label_wrap_gen(width=30)) +
geom_violin(alpha=0.9,aes(x=status, fill=otu), width = 1, lwd=0.75, draw_quantiles = c(0.5)) + scale_fill_brewer(palette = "Set3") +
annotate(x=0.4, xend=0.4, y=0.0, yend=1, lwd=0.4, geom="segment") +
scale_y_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1), limits = c(0,1.05)) +
stat_compare_means(aes(x=status), label= "p.format", label.y = 1.04, label.x.npc = 0.6, size=3) +
labs(fill="Group", x = "", y = "Bray-Curtis Dissimilarity Index") + theme_classic() +
theme(plot.title = element_text(size=12, face="bold", hjust=0.5), plot.subtitle = element_text(color="firebrick2", size=10, hjust=0.5),
axis.text.x = element_text(size=14, face="bold", color="black"), axis.text.y = element_text(size=10, color="black"), axis.ticks.x = element_blank(),
axis.line = element_blank(), axis.title.y = element_blank(), strip.background = element_rect(color="black", size=0.5),
strip.text.x = element_text(size=12, face="bold.italic", margin = unit(c(0.1,0,0.1,0), "cm")), plot.margin = unit(c(0,0.1,0,0), "cm"), panel.spacing = unit(2, "lines"),
legend.title = element_text(size=9, face="bold", hjust=0.5), legend.text = element_text(size = 7), legend.key.size = unit(0.4,"cm"), legend.position = "none")
# Build ggplot object
boxp_sal_strains_poster_build <- ggplot_build(boxp_sal_strains_poster)
# Replace with adjusted p-value
boxp_sal_strains_poster_build$data[[3]]$label <- paste0("q = ", signif(p.adjust(boxp_sal_strains_poster_build$data[[3]]$p, method = "BH"),2))
# Plot
plot(ggplot_gtable(boxp_sal_strains_poster_build))
# Save
#ggsave(file="output/ISR swab species comp status.pdf", ggplot_gtable(boxp_sal_strains_poster_build), width = 15, height = 10, units = "in")
AD_core_counts <- read.table(file="Input_files/AD_core_counts.txt", sep="\t", header = T)
dim(AD_core_counts) # 709 x 581
## [1] 709 581
# Create table for all core samples sequenced
all_site_samples_core <- data.frame(row.names(AD_core_counts))
colnames(all_site_samples_core) <- "samples"
all_site_samples_core$sample <- all_site_samples_core$samples
all_site_samples_core$sample <- gsub("_P1", "", all_site_samples_core$sample)
all_site_samples_core$sample <- gsub("_P2", "", all_site_samples_core$sample)
all_site_samples_core$sample <- gsub("_S", "", all_site_samples_core$sample)
# Add subject info
all_site_samples_core$type <- AD_clinical_meta_2019_ISR$type[match(all_site_samples_core$sample, AD_clinical_meta_2019_ISR$sample)]
# Add site info
all_site_samples_core$site <- as.character(all_site_samples_core$samples)
all_site_samples_core$site[grepl("P1", all_site_samples_core$site)] <- "Supragingival Plaque"
all_site_samples_core$site[grepl("P2", all_site_samples_core$site)] <- "Subgingival Plaque"
all_site_samples_core$site[grepl("_S", all_site_samples_core$site)] <- "Soft Tissue/Saliva Swab"
# Subset for mother and child only
all_site_samples_core_mch <- all_site_samples_core[all_site_samples_core$type == "Mother" | all_site_samples_core$type == "Child", ]
nrow(all_site_samples_core_mch) # 645
# Subset counts
core_all_sites_mch_counts <- AD_core_counts[row.names(AD_core_counts) %in% all_site_samples_core_mch$samples, ]
core_all_sites_mch_counts <- core_all_sites_mch_counts[,colSums(core_all_sites_mch_counts) > 0]
dim(core_all_sites_mch_counts) # 645 580
# Rarefy counts
set.seed(12345); core_all_sites_mch_counts_rar <- as.data.frame(rrarefy(core_all_sites_mch_counts, min(rowSums(core_all_sites_mch_counts))))
# Convert Rarified counts to Pr/Ab:
core_all_sites_mch_counts_prab <- data.frame((core_all_sites_mch_counts_rar > 0)*1)
# Compute NMDS
set.seed(12345); core_all_sites_mch_counts_prab.mds <- metaMDS(core_all_sites_mch_counts_prab, trymax = 200, autotransform = F, wascores = F)
# Making dataframe for plotting
core_all_sites_mch_counts_prab.mds.df <- data.frame(scores(core_all_sites_mch_counts_prab.mds, display = 'sites'))
# Add subject type info
core_all_sites_mch_counts_prab.mds.df$type <- all_site_samples_core$type[match(row.names(core_all_sites_mch_counts_prab.mds.df), all_site_samples_core$samples)]
core_all_sites_mch_counts_prab.mds.df$site <- all_site_samples_core$site[match(row.names(core_all_sites_mch_counts_prab.mds.df), all_site_samples_core$samples)]
# View(core_all_sites_mch_counts_prab.mds.df)
# Permanova
set.seed(12345); (core_all_sites_mch_mds_df.mch.perm <- adonis(formula = core_all_sites_mch_counts_prab ~ core_all_sites_mch_counts_prab.mds.df$type, strata = core_all_sites_mch_counts_prab.mds.df$site))
# 0.001 ***
set.seed(12345); (core_all_sites_mch_mds_df.site.perm <- adonis(formula = core_all_sites_mch_counts_prab ~ core_all_sites_mch_counts_prab.mds.df$site, strata = core_all_sites_mch_counts_prab.mds.df$type))
# 0.001 ***
# For S samples
core_sal <- AD_core_counts[grepl("_S", row.names(AD_core_counts)),]
core_sal <- core_sal[,colSums(core_sal) > 0]
dim(core_sal) # 240 469
## [1] 240 469
# Remove "_S" from sample names to match meta
row.names(core_sal) <- gsub("_S", "", row.names(core_sal))
# For Mother-Child Comparisons
core_sal_mch_all <- core_sal[row.names(core_sal) %in% ISR_sal_meta_mch$sample,]
core_sal_mch_all <- core_sal_mch_all[,colSums(core_sal_mch_all) > 0]
dim(core_sal_mch_all) # 195 x 461
## [1] 195 461
# Subset meta for sal mch dataset
core_sal_meta_mch_all <- ISR_sal_meta_mch[ISR_sal_meta_mch$sample %in% row.names(core_sal_mch_all), ]
table(core_sal_meta_mch_all$sub_type)
##
## Adopted Child Adopted Mother Biological Child Biological Mother
## 47 47 48 53
# Adopted Child Adopted Mother Biological Child Biological Mother
# 47 47 48 53
# Remove unpaired samples:
# Remove samples with only 1 family member
core_sal_mch_1fam <- names(table(core_sal_meta_mch_all$family_id))[table(core_sal_meta_mch_all$family_id) < 2]
core_sal_meta_mch <- core_sal_meta_mch_all[!(core_sal_meta_mch_all$family_id %in% core_sal_mch_1fam),]
table(core_sal_meta_mch$sub_type)
##
## Adopted Child Adopted Mother Biological Child Biological Mother
## 45 45 48 48
# Adopted Child Adopted Mother Biological Child Biological Mother
# 45 45 48 48
# Balanced
nrow(core_sal_meta_mch) # 186
## [1] 186
# Test age balance
core_sal_meta_mch_ch <- core_sal_meta_mch[core_sal_meta_mch$type == "Child", ]
wilcox.test(core_sal_meta_mch_ch$age ~ core_sal_meta_mch_ch$status, exact=F) # p-value = 0.3211 (Not Significant)
##
## Wilcoxon rank sum test with continuity correction
##
## data: core_sal_meta_mch_ch$age by core_sal_meta_mch_ch$status
## W = 952.5, p-value = 0.3289
## alternative hypothesis: true location shift is not equal to 0
# Subset counts for final comparison
core_sal_mch <- core_sal_mch_all[row.names(core_sal_mch_all) %in% core_sal_meta_mch$sample, ]
core_sal_mch <- core_sal_mch[, colSums(core_sal_mch) > 0]
dim(core_sal_mch) # 186 458
## [1] 186 458
# Re-order
core_sal_mch <- core_sal_mch[order(row.names(core_sal_mch)), ]
# sanity check
all(row.names(core_sal_mch) == core_sal_meta_mch$sample) # TRUE
## [1] TRUE
#### Computing Mother-Child Distances
# Pr/Ab Analysis
# Rarefy counts
set.seed(12345); core_sal_mch_rar <- as.data.frame(rrarefy(core_sal_mch, min(rowSums(core_sal_mch))))
# Convert Rarified counts to Pr/Ab:
core_sal_mch_prab <- data.frame((core_sal_mch_rar > 0)*1)
# Bio
core_sal_mch_prab_bio <- core_sal_mch_prab[row.names(core_sal_mch_prab) %in% core_sal_meta_mch$sample[core_sal_meta_mch$status == "Biological"], ]
core_sal_mch_prab_bio <- core_sal_mch_prab_bio[,colSums(core_sal_mch_prab_bio) > 0]
dim(core_sal_mch_prab_bio) # 96 360
## [1] 96 360
# Adp
core_sal_mch_prab_adp <- core_sal_mch_prab[row.names(core_sal_mch_prab) %in% core_sal_meta_mch$sample[core_sal_meta_mch$status == "Adopted"], ]
core_sal_mch_prab_adp <- core_sal_mch_prab_adp[,colSums(core_sal_mch_prab_adp) > 0]
dim(core_sal_mch_prab_adp) # 90 368
## [1] 90 368
# Compute distances and melt
core_sal_mch_prab_sfd <- AD.melt.dist.sf(core_sal_mch_prab, core_sal_meta_mch)$dmsf
# Edit group name
core_sal_mch_prab_sfd$status <- gsub("Adopted", "Adoptive", core_sal_mch_prab_sfd$status)
# Subset diff family for M-C only
core_sal_mch_prab_dfd_bio <- AD.melt.dist.sf(core_sal_mch_prab_bio, core_sal_meta_mch)$dmdf
core_sal_mch_prab_dfd_adp <- AD.melt.dist.sf(core_sal_mch_prab_adp, core_sal_meta_mch)$dmdf
# For M-C Only
# Bio
core_sal_mch_prab_dfd_bio$type1 <- core_sal_meta_mch$type[match(core_sal_mch_prab_dfd_bio$Var1, core_sal_meta_mch$sample)]
core_sal_mch_prab_dfd_bio$type2 <- core_sal_meta_mch$type[match(core_sal_mch_prab_dfd_bio$Var2, core_sal_meta_mch$sample)]
core_sal_mch_prab_dfd_bio_mc <- core_sal_mch_prab_dfd_bio[core_sal_mch_prab_dfd_bio$type1 != core_sal_mch_prab_dfd_bio$type2, ]
# Adp
core_sal_mch_prab_dfd_adp$type1 <- core_sal_meta_mch$type[match(core_sal_mch_prab_dfd_adp$Var1, core_sal_meta_mch$sample)]
core_sal_mch_prab_dfd_adp$type2 <- core_sal_meta_mch$type[match(core_sal_mch_prab_dfd_adp$Var2, core_sal_meta_mch$sample)]
core_sal_mch_prab_dfd_adp_mc <- core_sal_mch_prab_dfd_adp[core_sal_mch_prab_dfd_adp$type1 != core_sal_mch_prab_dfd_adp$type2, ]
# Remove type columns
core_sal_mch_prab_dfd_bio_mc <- core_sal_mch_prab_dfd_bio_mc[,c(1:7)]
core_sal_mch_prab_dfd_adp_mc <- core_sal_mch_prab_dfd_adp_mc[,c(1:7)]
# Rename status
core_sal_mch_prab_dfd_bio_mc$status <- gsub("Unrelated", "Unrelated Biological", core_sal_mch_prab_dfd_bio_mc$status)
core_sal_mch_prab_dfd_adp_mc$status <- gsub("Unrelated", "Unrelated Adoptive", core_sal_mch_prab_dfd_adp_mc$status)
# Combine for plotting
core_sal_mch_prab_sdfd <- rbind(core_sal_mch_prab_sfd, core_sal_mch_prab_dfd_bio_mc, core_sal_mch_prab_dfd_adp_mc)
core_sal_mch_prab_sdfd$status <- factor(core_sal_mch_prab_sdfd$status, levels=c("Biological", "Adoptive", "Unrelated Biological", "Unrelated Adoptive"))
# Calculate P value for bio/adp
core_sal_mch_prab_sfd_wilc <- wilcox_pval_star(core_sal_mch_prab_sfd$value, core_sal_mch_prab_sfd$status)$pval_star
# Calculate respective P values for bio unrelated
core_sal_mch_prab_bio_perm_pval_star <- perm_p_value(core_sal_mch_prab_bio,"bray")$pval_star
# Calculate respective P values for adp unrelated
core_sal_mch_prab_adp_perm_pval_star <- perm_p_value(core_sal_mch_prab_adp,"bray")$pval_star
ggplot(core_sal_mch_prab_sdfd, aes(x=status, y = value)) + geom_violin(alpha=0.7, width=0.95, lwd=0.1, aes(fill=status)) +
scale_fill_manual(values=c("cornflowerblue", "blueviolet", "#7B8492", "#827290")) + geom_boxplot(alpha=0.1, width=0.25, lwd=0.2, outlier.shape = NA) +
geom_jitter(data=core_sal_mch_prab_sfd, alpha=0.75, size=0.3, width = 0.05, height = 0) + scale_x_discrete(labels=function(x){sub("\\s", "\n", x)}) +
scale_y_continuous(breaks=c(0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0), limits = c(0,1.2), expand = c(0,0)) +
annotate(x=1,xend=2,y=1.05,yend=1.05, lwd=0.15,geom="segment") + annotate(x=1,xend=1,y=1.05,yend=1.04,lwd=0.15,geom="segment") + annotate(x=2,xend=2,y=1.05,yend=1.04,lwd=0.15,geom="segment") +
annotate("text", label = core_sal_mch_prab_sfd_wilc, x = 1.5, y = 1.06, size=2) +
annotate(x=1,xend=3,y=1.10,yend=1.10, lwd=0.15,geom="segment") + annotate(x=1,xend=1,y=1.10,yend=1.09,lwd=0.15,geom="segment") + annotate(x=3,xend=3,y=1.10,yend=1.09,lwd=0.15,geom="segment") +
annotate("text", label = core_sal_mch_prab_bio_perm_pval_star, x = 2, y = 1.11, size=2) +
annotate(x=2,xend=4,y=1.14,yend=1.14, lwd=0.15,geom="segment") + annotate(x=2,xend=2,y=1.14,yend=1.13,lwd=0.15,geom="segment") + annotate(x=4,xend=4,y=1.14,yend=1.13,lwd=0.15,geom="segment") +
annotate("text", label = core_sal_mch_prab_adp_perm_pval_star, x = 3, y = 1.15, size=2) +
annotate(x=0.4, xend=0.4, y=0, yend=1, lwd=0.4, geom="segment") +
labs(x = "", y = "Mother-Child Dissimilarities (Bray-Curtis)") + theme_classic() +
theme(plot.title = element_blank(),
axis.text.x = element_text(size=9, face="bold", color="black", hjust = 0.5), axis.line.x = element_line(size = 0.2),
axis.text.y = element_text(size=9, face="bold", color="black"), axis.title.y = element_text(size=12, face="bold", color="black", margin = margin(r=15), hjust = 0.35), axis.line.y = element_blank(),
plot.margin = unit(c(0.1,0,0,0), "cm"), legend.position = "none")
#ggsave(file="output/core_sal_mch_violin_jitter_final.pdf", width = 3.5, height = 5.2, units = "in")
# For P1 samples
core_sup <- AD_core_counts[grepl("_P1", row.names(AD_core_counts)),]
core_sup <- core_sup[,colSums(core_sup) > 0]
dim(core_sup) # 269 519
## [1] 241 514
# Remove "_S" from sample names to match meta
row.names(core_sup) <- gsub("_P1", "", row.names(core_sup))
# For Mother-Child Comparisons
core_sup_mch_all <- core_sup[row.names(core_sup) %in% ISR_sup_meta_mch$sample,]
core_sup_mch_all <- core_sup_mch_all[,colSums(core_sup_mch_all) > 0]
dim(core_sup_mch_all) # 192 503
## [1] 192 503
# Subset meta for sup mch dataset
core_sup_meta_mch_all <- ISR_sup_meta_mch[ISR_sup_meta_mch$sample %in% row.names(core_sup_mch_all), ]
table(core_sup_meta_mch_all$sub_type)
##
## Adopted Child Adopted Mother Biological Child Biological Mother
## 45 46 49 52
# Adopted Child Adopted Mother Biological Child Biological Mother
# 45 46 49 52
# Remove unpaired samples:
# Remove samples with only 1 family member
core_sup_mch_1fam <- names(table(core_sup_meta_mch_all$family_id))[table(core_sup_meta_mch_all$family_id) < 2]
core_sup_meta_mch <- core_sup_meta_mch_all[!(core_sup_meta_mch_all$family_id %in% core_sup_mch_1fam),]
table(core_sup_meta_mch$sub_type)
##
## Adopted Child Adopted Mother Biological Child Biological Mother
## 45 45 48 48
# Adopted Child Adopted Mother Biological Child Biological Mother
# 45 45 48 48
# Balanced
nrow(core_sup_meta_mch) # 186
## [1] 186
# Test age balance
core_sup_meta_mch_ch <- core_sup_meta_mch[core_sup_meta_mch$type == "Child", ]
wilcox.test(core_sup_meta_mch_ch$age ~ core_sup_meta_mch_ch$status, exact=F) # p-value = 0.4216 (Not Significant)
##
## Wilcoxon rank sum test with continuity correction
##
## data: core_sup_meta_mch_ch$age by core_sup_meta_mch_ch$status
## W = 975.5, p-value = 0.424
## alternative hypothesis: true location shift is not equal to 0
# Subset counts for final comparison
core_sup_mch <- core_sup_mch_all[row.names(core_sup_mch_all) %in% core_sup_meta_mch$sample, ]
core_sup_mch <- core_sup_mch[, colSums(core_sup_mch) > 0]
dim(core_sup_mch) # 186 502
## [1] 186 502
# Re-order
core_sup_mch <- core_sup_mch[order(row.names(core_sup_mch)), ]
# sanity check
all(row.names(core_sup_mch) == core_sup_meta_mch$sample) # TRUE
## [1] TRUE
## [1] 96 330
## [1] 90 412
# For P2 samples
core_sub <- AD_core_counts[grepl("_P2", row.names(AD_core_counts)),]
core_sub <- core_sub[,colSums(core_sub) > 0]
dim(core_sub) # 228 544
## [1] 228 544
# Remove "_S" from sample names to match meta
row.names(core_sub) <- gsub("_P2", "", row.names(core_sub))
# For Mother-Child Comparisons
core_sub_mch_all <- core_sub[row.names(core_sub) %in% ISR_sub_meta_mch$sample,]
core_sub_mch_all <- core_sub_mch_all[,colSums(core_sub_mch_all) > 0]
dim(core_sub_mch_all) # 180 534
## [1] 180 534
# Subset meta for sup mch dataset
core_sub_meta_mch_all <- ISR_sub_meta_mch[ISR_sub_meta_mch$sample %in% row.names(core_sub_mch_all), ]
table(core_sub_meta_mch_all$sub_type)
##
## Adopted Child Adopted Mother Biological Child Biological Mother
## 42 42 47 49
# Adopted Child Adopted Mother Biological Child Biological Mother
# 42 42 47 49
# Remove unpaired samples:
# Remove samples with only 1 family member
core_sub_mch_1fam <- names(table(core_sub_meta_mch_all$family_id))[table(core_sub_meta_mch_all$family_id) < 2]
core_sub_meta_mch <- core_sub_meta_mch_all[!(core_sub_meta_mch_all$family_id %in% core_sub_mch_1fam),]
table(core_sub_meta_mch$sub_type)
##
## Adopted Child Adopted Mother Biological Child Biological Mother
## 40 40 46 46
# Adopted Child Adopted Mother Biological Child Biological Mother
# 40 40 46 46
# Balanced
nrow(core_sub_meta_mch) # 172
## [1] 172
# Test age balance
core_sub_meta_mch_ch <- core_sub_meta_mch[core_sub_meta_mch$type == "Child", ]
wilcox.test(core_sub_meta_mch_ch$age ~ core_sub_meta_mch_ch$status, exact=F) # p-value = 0.321 (Not Significant)
##
## Wilcoxon rank sum test with continuity correction
##
## data: core_sub_meta_mch_ch$age by core_sub_meta_mch_ch$status
## W = 806.5, p-value = 0.3279
## alternative hypothesis: true location shift is not equal to 0
# Subset counts for final comparison
core_sub_mch <- core_sub_mch_all[row.names(core_sub_mch_all) %in% core_sub_meta_mch$sample, ]
core_sub_mch <- core_sub_mch[, colSums(core_sub_mch) > 0]
dim(core_sub_mch) # 172 533
## [1] 172 533
# Re-order
core_sub_mch <- core_sub_mch[order(row.names(core_sub_mch)), ]
# sanity check
all(row.names(core_sub_mch) == core_sub_meta_mch$sample) # TRUE
## [1] TRUE
# Pr/Ab Analysis
# Rarefy counts
set.seed(12345); core_sub_mch_rar <- as.data.frame(rrarefy(core_sub_mch, min(rowSums(core_sub_mch))))
# Convert Rarified counts to Pr/Ab:
core_sub_mch_prab <- data.frame((core_sub_mch_rar > 0)*1)
## [1] 92 404
## [1] 80 456
#### For CORE Shared Strain Analysis
# Subset for mother
core_sal_meta_mch_mo <- core_sal_meta_mch[core_sal_meta_mch$type == "Mother", ]
nrow(core_sal_meta_mch_mo) # 93
# Subset prabs for mo and ch
core_sal_mch_prab_mo <- core_sal_mch_prab[rownames(core_sal_mch_prab) %in% core_sal_meta_mch_mo$sample, ]
core_sal_mch_prab_ch <- core_sal_mch_prab[rownames(core_sal_mch_prab) %in% core_sal_meta_mch_ch$sample, ]
write.table(core_sal_mch_prab_mo, file="output/core_sal_mch_prab_mo.txt", sep="\t")
write.table(core_sal_mch_prab_ch, file="output/core_sal_mch_prab_ch.txt", sep="\t")
# Bio
dim(ISR_sal_mch_prab_bio) # 108 1324
## [1] 108 1324
ISR_sal_meta_mch_bio <- ISR_sal_meta_mch[ISR_sal_meta_mch$status == "Biological", ]
dim(ISR_sal_meta_mch_bio) # 108 x 24 (54 pairs)
## [1] 108 24
# Compute distances and melt
ISR_sal_mch_prab_bio_sfd <- AD.melt.dist.sf(ISR_sal_mch_prab_bio, ISR_sal_meta_mch_bio)$dmsf
View(ISR_sal_mch_prab_bio_sfd)
ISR_sal_mch_prab_bio_sfd$feed_mode <- ISR_sal_meta_mch_bio$feed_mode[match(ISR_sal_mch_prab_bio_sfd$Var1, ISR_sal_meta_mch_bio$sample)]
ISR_sal_mch_prab_bio_sfd$del_mode <- ISR_sal_meta_mch_bio$del_mode[match(ISR_sal_mch_prab_bio_sfd$Var1, ISR_sal_meta_mch_bio$sample)]
ggplot(ISR_sal_mch_prab_bio_sfd, aes(x=feed_mode, y = value)) + geom_violin(alpha=0.7, width=0.95, lwd=0.1, aes(fill=feed_mode)) +
stat_compare_means(label.y = 0.4, label.x.npc = 0.5) +
#scale_fill_manual(values=c("cornflowerblue", "blueviolet", "#7B8492", "#827290")) +
geom_boxplot(alpha=0.1, width=0.25, lwd=0.2, outlier.shape = NA) +
scale_y_continuous(breaks=c(0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0), limits = c(0,1.2), expand = c(0,0)) +
annotate(x=0.4, xend=0.4, y=0, yend=1, lwd=0.4, geom="segment") +
labs(x = "", y = "Mother-Child Dissimilarities (Bray-Curtis)") + theme_classic() +
theme(plot.title = element_blank(),
axis.text.x = element_text(size=9, face="bold", color="black", hjust = 0.5), axis.line.x = element_line(size = 0.2),
axis.text.y = element_text(size=9, face="bold", color="black"), axis.title.y = element_text(size=12, face="bold", color="black", margin = margin(r=15), hjust = 0.35), axis.line.y = element_blank(),
plot.margin = unit(c(0.1,0,0,0), "cm"), legend.position = "none")
#ggsave(file="output/ISR_sal_mch_feedmode.pdf", width = 3.5, height = 5.2, units = "in")
ggplot(ISR_sal_mch_prab_bio_sfd, aes(x=del_mode, y = value)) + geom_violin(alpha=0.7, width=0.95, lwd=0.1, aes(fill=del_mode)) +
stat_compare_means(label.y = 0.4, label.x.npc = 0.5) +
#scale_fill_manual(values=c("cornflowerblue", "blueviolet", "#7B8492", "#827290")) +
geom_boxplot(alpha=0.1, width=0.25, lwd=0.2, outlier.shape = NA) +
scale_y_continuous(breaks=c(0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0), limits = c(0,1.2), expand = c(0,0)) +
annotate(x=0.4, xend=0.4, y=0, yend=1, lwd=0.4, geom="segment") +
labs(x = "", y = "Mother-Child Dissimilarities (Bray-Curtis)") + theme_classic() +
theme(plot.title = element_blank(),
axis.text.x = element_text(size=9, face="bold", color="black", hjust = 0.5), axis.line.x = element_line(size = 0.2),
axis.text.y = element_text(size=9, face="bold", color="black"), axis.title.y = element_text(size=12, face="bold", color="black", margin = margin(r=15), hjust = 0.35), axis.line.y = element_blank(),
plot.margin = unit(c(0.1,0,0,0), "cm"), legend.position = "none")
#ggsave(file="output/ISR_sal_mch_delmode.pdf", width = 3.5, height = 5.2, units = "in")
# Bio
ISR_sub_meta_mch_bio <- ISR_sub_meta_mch[ISR_sub_meta_mch$status == "Biological", ]
dim(ISR_sub_meta_mch_bio) # 102 x 24 (53 pairs)
## [1] 102 24
# Subset counts
ISR_sub_mch_prab_bio <- ISR_sub_mch[row.names(ISR_sub_mch) %in% ISR_sub_meta_mch_bio$sample, ]
ISR_sub_mch_prab_bio <- ISR_sub_mch_prab_bio[, colSums(ISR_sub_mch_prab_bio) > 0]
dim(ISR_sub_mch_prab_bio) # 102 2079
## [1] 102 2079
# Compute distances and melt
ISR_sub_mch_prab_bio_sfd <- AD.melt.dist.sf(ISR_sub_mch_prab_bio, ISR_sub_meta_mch_bio)$dmsf
#View(ISR_sub_mch_prab_bio_sfd)
ISR_sub_mch_prab_bio_sfd$feed_mode <- ISR_sub_meta_mch_bio$feed_mode[match(ISR_sub_mch_prab_bio_sfd$Var1, ISR_sub_meta_mch_bio$sample)]
ISR_sub_mch_prab_bio_sfd$del_mode <- ISR_sub_meta_mch_bio$del_mode[match(ISR_sub_mch_prab_bio_sfd$Var1, ISR_sub_meta_mch_bio$sample)]
ggplot(ISR_sub_mch_prab_bio_sfd, aes(x=feed_mode, y = value)) + geom_violin(alpha=0.7, width=0.95, lwd=0.1, aes(fill=feed_mode)) +
stat_compare_means(label.y = 0.4, label.x.npc = 0.5) +
#scale_fill_manual(values=c("cornflowerblue", "blueviolet", "#7B8492", "#827290")) +
geom_boxplot(alpha=0.1, width=0.25, lwd=0.2, outlier.shape = NA) +
scale_y_continuous(breaks=c(0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0), limits = c(0,1.2), expand = c(0,0)) +
annotate(x=0.4, xend=0.4, y=0, yend=1, lwd=0.4, geom="segment") +
labs(x = "", y = "Mother-Child Dissimilarities (Bray-Curtis)") + theme_classic() +
theme(plot.title = element_blank(),
axis.text.x = element_text(size=9, face="bold", color="black", hjust = 0.5), axis.line.x = element_line(size = 0.2),
axis.text.y = element_text(size=9, face="bold", color="black"), axis.title.y = element_text(size=12, face="bold", color="black", margin = margin(r=15), hjust = 0.35), axis.line.y = element_blank(),
plot.margin = unit(c(0.1,0,0,0), "cm"), legend.position = "none")
#ggsave(file="output/ISR_sub_mch_feedmode.pdf", width = 3.5, height = 5.2, units = "in")
ggplot(ISR_sub_mch_prab_bio_sfd, aes(x=del_mode, y = value)) + geom_violin(alpha=0.7, width=0.95, lwd=0.1, aes(fill=del_mode)) +
stat_compare_means(label.y = 0.4, label.x.npc = 0.5) +
#scale_fill_manual(values=c("cornflowerblue", "blueviolet", "#7B8492", "#827290")) +
geom_boxplot(alpha=0.1, width=0.25, lwd=0.2, outlier.shape = NA) +
scale_y_continuous(breaks=c(0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0), limits = c(0,1.2), expand = c(0,0)) +
annotate(x=0.4, xend=0.4, y=0, yend=1, lwd=0.4, geom="segment") +
labs(x = "", y = "Mother-Child Dissimilarities (Bray-Curtis)") + theme_classic() +
theme(plot.title = element_blank(),
axis.text.x = element_text(size=9, face="bold", color="black", hjust = 0.5), axis.line.x = element_line(size = 0.2),
axis.text.y = element_text(size=9, face="bold", color="black"), axis.title.y = element_text(size=12, face="bold", color="black", margin = margin(r=15), hjust = 0.35), axis.line.y = element_blank(),
plot.margin = unit(c(0.1,0,0,0), "cm"), legend.position = "none")
#ggsave(file="output/ISR_sub_mch_delmode.pdf", width = 3.5, height = 5.2, units = "in")
# Bio
ISR_sup_meta_mch_bio <- ISR_sup_meta_mch[ISR_sup_meta_mch$status == "Biological", ]
dim(ISR_sup_meta_mch_bio) # 102 x 24 (53 pairs)
## [1] 106 24
# supset counts
ISR_sup_mch_prab_bio <- ISR_sup_mch[row.names(ISR_sup_mch) %in% ISR_sup_meta_mch_bio$sample, ]
ISR_sup_mch_prab_bio <- ISR_sup_mch_prab_bio[, colSums(ISR_sup_mch_prab_bio) > 0]
dim(ISR_sup_mch_prab_bio) # 102 2079
## [1] 106 1580
# Compute distances and melt
ISR_sup_mch_prab_bio_sfd <- AD.melt.dist.sf(ISR_sup_mch_prab_bio, ISR_sup_meta_mch_bio)$dmsf
#View(ISR_sup_mch_prab_bio_sfd)
ISR_sup_mch_prab_bio_sfd$feed_mode <- ISR_sup_meta_mch_bio$feed_mode[match(ISR_sup_mch_prab_bio_sfd$Var1, ISR_sup_meta_mch_bio$sample)]
ISR_sup_mch_prab_bio_sfd$del_mode <- ISR_sup_meta_mch_bio$del_mode[match(ISR_sup_mch_prab_bio_sfd$Var1, ISR_sup_meta_mch_bio$sample)]
ggplot(ISR_sup_mch_prab_bio_sfd, aes(x=feed_mode, y = value)) + geom_violin(alpha=0.7, width=0.95, lwd=0.1, aes(fill=feed_mode)) +
stat_compare_means(label.y = 0.4, label.x.npc = 0.5) +
#scale_fill_manual(values=c("cornflowerblue", "blueviolet", "#7B8492", "#827290")) +
geom_boxplot(alpha=0.1, width=0.25, lwd=0.2, outlier.shape = NA) +
scale_y_continuous(breaks=c(0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0), limits = c(0,1.2), expand = c(0,0)) +
annotate(x=0.4, xend=0.4, y=0, yend=1, lwd=0.4, geom="segment") +
labs(x = "", y = "Mother-Child Dissimilarities (Bray-Curtis)") + theme_classic() +
theme(plot.title = element_blank(),
axis.text.x = element_text(size=9, face="bold", color="black", hjust = 0.5), axis.line.x = element_line(size = 0.2),
axis.text.y = element_text(size=9, face="bold", color="black"), axis.title.y = element_text(size=12, face="bold", color="black", margin = margin(r=15), hjust = 0.35), axis.line.y = element_blank(),
plot.margin = unit(c(0.1,0,0,0), "cm"), legend.position = "none")
#ggsave(file="output/ISR_sup_mch_feedmode.pdf", width = 3.5, height = 5.2, units = "in")
ggplot(ISR_sup_mch_prab_bio_sfd, aes(x=del_mode, y = value)) + geom_violin(alpha=0.7, width=0.95, lwd=0.1, aes(fill=del_mode)) +
stat_compare_means(label.y = 0.4, label.x.npc = 0.5) +
#scale_fill_manual(values=c("cornflowerblue", "blueviolet", "#7B8492", "#827290")) +
geom_boxplot(alpha=0.1, width=0.25, lwd=0.2, outlier.shape = NA) +
scale_y_continuous(breaks=c(0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0), limits = c(0,1.2), expand = c(0,0)) +
annotate(x=0.4, xend=0.4, y=0, yend=1, lwd=0.4, geom="segment") +
labs(x = "", y = "Mother-Child Dissimilarities (Bray-Curtis)") + theme_classic() +
theme(plot.title = element_blank(),
axis.text.x = element_text(size=9, face="bold", color="black", hjust = 0.5), axis.line.x = element_line(size = 0.2),
axis.text.y = element_text(size=9, face="bold", color="black"), axis.title.y = element_text(size=12, face="bold", color="black", margin = margin(r=15), hjust = 0.35), axis.line.y = element_blank(),
plot.margin = unit(c(0.1,0,0,0), "cm"), legend.position = "none")
#ggsave(file="output/ISR_sup_mch_delmode.pdf", width = 3.5, height = 5.2, units = "in")
#### For Subject Type Analysis
# Calculate Centroid Distances
ISR_all_sites_type_dists <- data.frame(betadisper(vegdist(ISR_all_sites_mch_counts_prab), factor(ISR_all_sites_mch_counts_prab.mds.df$type))$distances)
colnames(ISR_all_sites_type_dists) <- "cent_dist"
# Calculate Centroid Distances
core_all_sites_type_dists <- data.frame(betadisper(vegdist(core_all_sites_mch_counts_prab), factor(core_all_sites_mch_counts_prab.mds.df$type))$distances)
colnames(core_all_sites_type_dists) <- "cent_dist"
# Make list of common samples in ISR and Core
ISR_all_sites_sample_names <- gsub("_ISR", "", row.names(ISR_all_sites_mch_counts_prab))
# Generate common samples
ISR_core_common_samps <- intersect(ISR_all_sites_sample_names, row.names(core_all_sites_mch_counts_prab))
length(ISR_core_common_samps) # 641
## [1] 641
# Subset ISR table
# Make new column
ISR_all_sites_type_dists$samps <- gsub("_ISR", "", row.names(ISR_all_sites_type_dists))
core_all_sites_type_dists$samps <- row.names(core_all_sites_type_dists)
# Subset
ISR_all_sites_type_dists_matched <- ISR_all_sites_type_dists[ISR_all_sites_type_dists$samps %in% ISR_core_common_samps, ]
nrow(ISR_all_sites_type_dists_matched) # 641
## [1] 641
# Subset core table
core_all_sites_type_dists_matched <- core_all_sites_type_dists[row.names(core_all_sites_type_dists) %in% ISR_core_common_samps, ]
nrow(core_all_sites_type_dists_matched) # 641
## [1] 641
# Add Core cent dists to ISR cent dists table
ISR_all_sites_type_dists_matched$core_sub_cent_dists <- core_all_sites_type_dists_matched$cent_dist[match(ISR_all_sites_type_dists_matched$samps, ISR_all_sites_type_dists_matched$samps)]
colnames(ISR_all_sites_type_dists_matched) <- c("Strain Level \nSubject Type Cent. Dists", "Samples", "Species Level \nSubject Type Cent. Dists")
# Melt table for plotting
Compare_ISR_core_sub_centdists <- melt(ISR_all_sites_type_dists_matched)
## Using Samples as id variables
ggplot(Compare_ISR_core_sub_centdists, aes(x=variable, y=value)) + geom_boxplot(aes(fill=variable), lwd=0.2, alpha=0.9, outlier.size = 0.2, width=0.5) +
scale_fill_manual(values = c("darkred", "darkgreen")) +
stat_compare_means(paired = TRUE, label.x.npc = 0.65, label.y.npc = 0.9, label = "p.signif", size=6,
symnum.args = list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05), symbols = c("***", "**", "*", "ns"))) +
scale_y_continuous(breaks=c(0.2, 0.4, 0.6, 0.8), limits = c(0.1,0.9), expand = c(0,0)) +
annotate(x=0.4, xend=0.4, y=0.2, yend=0.8, lwd=0.3, geom="segment") +
labs(x = "", y = " Centroid Distances (Bray-Curtis)") + theme_classic() +
theme(plot.title = element_blank(),
axis.text.x = element_text(size=9, face="bold", color="black", hjust = 0.5), axis.line.x = element_line(size = 0.2),
axis.text.y = element_text(size=9, face="bold", color="black"), axis.title.y = element_text(size=12, face="bold", color="black", margin = margin(r=10), hjust = 0.35),
axis.line.y = element_blank(),
plot.margin = unit(c(0.1,0,0,0), "cm"), legend.position = "none")
#ggsave(file="output/Cent Dist Sub Comp.pdf", width = 4, height = 5, units = "in")
# For Subject Type Analysis
# ISR
ISR_all_sites_site_dists <- data.frame(betadisper(vegdist(ISR_all_sites_mch_counts_prab), factor(ISR_all_sites_mch_counts_prab.mds.df$type))$distances)
colnames(ISR_all_sites_site_dists) <- "cent_dist"
# Core Species
core_all_sites_site_dists <- data.frame(betadisper(vegdist(core_all_sites_mch_counts_prab), factor(core_all_sites_mch_counts_prab.mds.df$type))$distances)
colnames(core_all_sites_site_dists) <- "cent_dist"
# Subset ISR table
# Make new column
ISR_all_sites_site_dists$samps <- gsub("_ISR", "", row.names(ISR_all_sites_site_dists))
core_all_sites_site_dists$samps <- row.names(core_all_sites_site_dists)
# Subset
ISR_all_sites_site_dists_matched <- ISR_all_sites_site_dists[ISR_all_sites_site_dists$samps %in% ISR_core_common_samps, ]
nrow(ISR_all_sites_site_dists_matched) # 641
## [1] 641
# Subset core table
core_all_sites_site_dists_matched <- core_all_sites_site_dists[row.names(core_all_sites_site_dists) %in% ISR_core_common_samps, ]
nrow(core_all_sites_site_dists_matched) # 641
## [1] 641
# Combine ISR & core data
ISR_all_sites_site_dists_matched$core_sub_cent_dists <- core_all_sites_site_dists_matched$cent_dist[match(ISR_all_sites_site_dists_matched$samps, ISR_all_sites_site_dists_matched$samps)]
colnames(ISR_all_sites_site_dists_matched) <- c("Strain Level \nSampling Site Cent. Dists", "Samples", "Species Level \nSampling Site Cent. Dists")
Compare_ISR_core_site_centdists <- melt(ISR_all_sites_site_dists_matched)
## Using Samples as id variables
Compare_ISR_core_site_centdists$site <- all_site_samples_core$site[match(Compare_ISR_core_site_centdists$Samples, all_site_samples_core$samples)]